In [1]:
import pandas as pd
import numpy as np
from scipy.stats import skew

from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge

In [2]:
df_train = pd.read_csv('train_clean.csv', sep='|')
df_test = pd.read_csv('test_clean.csv', sep='|')

In [4]:
#split dataframe
y = df_train['SalePrice'].values
X = df_train.loc[:,'LotFrontage':'2010.8'].values

In [5]:
#root mean square error function 
def rmse(model):
    error = np.sqrt(-cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=5))
    return(error)

In [6]:
#hyperparameter tuning for Ridge alpha
#alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50] FOR REFERENCE
alphas = [4,5,6,7,8,9,10,11,12,13,14]
for alpha in alphas:
    print('alpha: ' + str(alpha) + '   rmse: ' + str(rmse(Ridge(alpha=alpha)).mean()))
    
#alpha = 5

alpha: 4   rmse: 0.134106382041
alpha: 5   rmse: 0.133957784497
alpha: 6   rmse: 0.133939530653
alpha: 7   rmse: 0.134002632495
alpha: 8   rmse: 0.134119465037
alpha: 9   rmse: 0.134273056404
alpha: 10   rmse: 0.134452320416
alpha: 11   rmse: 0.134649680985
alpha: 12   rmse: 0.134859781827
alpha: 13   rmse: 0.135078736498
alpha: 14   rmse: 0.135303668127


In [8]:
#Ridge regressor
ridge = Ridge(alpha=5, normalize=True)
ridge.fit(X, y)

ID = df_test['Id']
df_test = df_test.drop('Id', axis=1)
predictions = 2.718281828459 ** ridge.predict(df_test)

result = pd.DataFrame({'Id':ID, 'SalePrice':predictions})
result.to_csv('ridge_model.csv', index=False)

In [9]:
result.head()

Unnamed: 0,Id,SalePrice
0,1461,127220.242783
1,1462,150719.19398
2,1463,179564.669904
3,1464,196954.073401
4,1465,176209.642683
