In [28]:
#ElasticNet Method
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn import datasets


In [29]:
#Import Data and Extract Features and Dependend Variable
train = pd.read_csv("./data/cleaned_standardized_fe.csv")
features = train.drop('SalePrice', axis = 1)
price = train['SalePrice'] 

In [30]:
#Cross Validate Elastic Model to Select Best alpha and rho


alphas = np.linspace(.0001, 1, 40)
rhos = np.linspace(0.01, 1, 30)
elastic_cv = ElasticNetCV(alphas = alphas, cv = 10, max_iter = 10000, normalize = False, l1_ratio = rhos)
elastic_cv.fit(features,price)
print('the best alpha is: %.5f' %(elastic_cv.alpha_))
print('the best rho is: %.5f' %(elastic_cv.l1_ratio_))


the best alpha is: 0.07702
the best rho is: 0.11241


In [31]:
#Fit Model on complete test set using best alpha and l1_ratio and get R^2 score and print coeficients
elastic = ElasticNet()
elastic.set_params(normalize=False)
elastic = ElasticNet(alpha = elastic_cv.alpha_, l1_ratio = elastic_cv.l1_ratio_)
elastic.fit(features, price)
print('the R^2 Score is: %.2f' %(elastic.score(features, price)))
pd.Series(elastic.coef_, index=features.columns)

the R^2 Score is: 0.91


LotFrontage             -0.085803
LotArea                  0.000000
MasVnrArea              -0.000000
HeatingQC               -0.037574
LowQualFinSF            -0.002751
GrLivArea               -0.073816
FullBath                -0.077453
HalfBath                -0.029400
BedroomAbvGr            -0.000000
TotRmsAbvGrd            -0.035103
Functional              -0.043965
GarageCars              -0.085143
MSZoning_Residential    -0.068888
LotShape_Reg             0.000000
LandContour_NotLvl       0.000000
LotConfig_CulDSac       -0.016760
LotConfig_Inside         0.000000
LotConfig_Other          0.006069
LandSlope_Mod           -0.010809
LandSlope_Sev            0.000000
Condition1_Norm         -0.046572
Condition1_Pos          -0.000000
Condition1_RR            0.000000
HouseStyle_1.5Unf        0.000000
HouseStyle_1Story        0.000000
HouseStyle_2.5Fin        0.000000
HouseStyle_2.5Unf       -0.000000
HouseStyle_2Story        0.000000
HouseStyle_SFoyer       -0.000000
HouseStyle_SLv

In [32]:
test = pd.read_csv("./data/cleaned_standardized_TEST.csv")
train_pre_stand = pd.read_csv("./data/train.csv")
submission = pd.read_csv("./data/sample_submission.csv")
sale_price_pred = elastic.predict(test)
submission['SalePrice'] = sale_price_pred
submission.head()

Unnamed: 0,Id,SalePrice
0,1461,0.786792
1,1462,0.150369
2,1463,-0.322161
3,1464,-0.583861
4,1465,-0.55104


In [33]:
#inverse transform
from sklearn.preprocessing import StandardScaler
#Re-Train Scaler
scaler = StandardScaler()
scaler = scaler.fit(train_pre_stand[['SalePrice']]**-.1) 


# inverse transform on transformed price
submission['SalePrice'] = (scaler.inverse_transform(submission['SalePrice'])**-10).round(0)
submission.to_csv("./data/elastic_submission.csv",index = False)

In [34]:
submission['SalePrice'].head()

0    121449.0
1    155799.0
2    188218.0
3    209317.0
4    206533.0
Name: SalePrice, dtype: float64