In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso

In [22]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
df_train = pd.read_csv('train_clean.csv', sep='|')
df_test = pd.read_csv('test_clean.csv', sep='|')

In [3]:
#split dataframe
y = df_train['SalePrice'].values
X = df_train.loc[:,'LotFrontage':'2010.8'].values

In [26]:
#root mean square error function 
def rmse(model):
    error = np.sqrt(-cross_val_score(model, X, y, cv=10, scoring='mean_squared_error').mean())
    return(error)

In [27]:
#hyperparameter tuning for Lasso alpha
#alphas = [1, 0.1, 0.001, 0.0005]
alphas = [0.0005,0.0006,0.0007,0.0008,0.0009,0.001,0.002]
for alpha in alphas:
    print('alpha: ' + str(alpha) + '   rmse: ' + str(rmse(Lasso(alpha=alpha))))

#alpha = 0.0007

alpha: 0.0005   rmse: 0.130564264104
alpha: 0.0006   rmse: 0.130376699186
alpha: 0.0007   rmse: 0.130308772781
alpha: 0.0008   rmse: 0.13051688445
alpha: 0.0009   rmse: 0.130902342394
alpha: 0.001   rmse: 0.13143972732
alpha: 0.002   rmse: 0.137872709306


In [10]:
#Lasso regressor
lasso = Lasso(alpha=0.0007, normalize=True)
lasso.fit(X, y)
print(lasso.coef_)

[  0.00000000e+00   3.25871125e-02  -0.00000000e+00  -0.00000000e+00
   0.00000000e+00   1.09645279e-02   2.48926954e-01   0.00000000e+00
  -0.00000000e+00   0.00000000e+00   9.78662072e-03   0.00000000e+00
   1.74890859e-04   0.00000000e+00   0.00000000e+00  -0.00000000e+00
   0.00000000e+00   0.00000000e+00  -0.00000000e+00  -0.00000000e+00
  -0.00000000e+00  -0.00000000e+00   0.00000000e+00   1.01323546e-02
   4.75736117e-03   1.66106476e-06   9.13935654e-02   0.00000000e+00
   1.18071931e-02   1.50674263e-06  -9.90362926e-02   0.00000000e+00
  -0.00000000e+00   0.00000000e+00  -2.89734341e-02  -0.00000000e+00
   0.00000000e+00  -0.00000000e+00   0.00000000e+00   0.00000000e+00
  -0.00000000e+00   0.00000000e+00   0.00000000e+00  -0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00  -0.00000000e+00
  -0.00000000e+00  -0.00000000e+00  -0.00000000e+00  -0.00000000e+00
  -0.00000000e+00  -0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00  -6.93231413e-03

In [11]:
#make predictions
ID = df_test['Id']
df_test = df_test.drop('Id', axis=1)
predictions = 2.718281828459 ** lasso.predict(df_test)

result = pd.DataFrame({'Id':ID, 'SalePrice':predictions})
result.to_csv('lasso_model.csv', index=False)

In [12]:
result.head()

Unnamed: 0,Id,SalePrice
0,1461,113097.689732
1,1462,144283.708558
2,1463,166278.107956
3,1464,187058.423629
4,1465,175634.413717
