In [16]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score,make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split,GridSearchCV,KFold

In [17]:
df=pd.read_csv('processed.csv')
print('Data shape:',df.shape)

Data shape: (467, 7)


In [19]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
print(X.shape)
print(y.shape)

(467, 6)
(467,)


In [20]:
param_grid={
    'alpha': [1e-2,1e-1,1,10,100]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

def train_model(X_train,y_train):
    reg = Ridge()
    grid=GridSearchCV(reg,param_grid,cv=5,scoring=make_scorer(r2_score),n_jobs=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    print(grid.best_score_)
    return grid

def cv_scores(X,y,model):
    r2_test=[]
    r2_train=[]
    for train_index, test_index in KFold(shuffle=True,random_state=0).split(X,y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        model.fit(X_train,y_train)
        r2_test.append(r2_score(y_test,model.predict(X_test)))
        r2_train.append(r2_score(y_train,model.predict(X_train)))
    print('Train r2:',np.mean(r2_train))
    print('Test r2:',np.mean(r2_test))

In [21]:
model=train_model(X_train,y_train)

{'alpha': 10}
0.3716375198867544


In [22]:
cv_scores(X,y,model)

Train r2: 0.3722104848490727
Test r2: 0.34584033859802277
