In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score,make_scorer
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split,ParameterGrid,KFold,cross_val_score
from metrics import partsMetrics,allMetrics

In [2]:
df=pd.read_csv('processed.csv')
print('Data shape:',df.shape)

Data shape: (467, 7)


In [3]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]
print(X.shape)
print(y.shape)

(467, 6)
(467,)


In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
param_grid = {
    'degree': range(2,11),
    'interaction_only': [True, False],
    'include_bias': [True, False]
}
best_params={}
best_r2=-1e9

for params in ParameterGrid(param_grid):
    poly_features = PolynomialFeatures(**params)
    X_train_poly = poly_features.fit_transform(X_train)
    X_train_poly = StandardScaler().fit_transform(X_train_poly)
    reg=LinearRegression()
    kf = KFold(n_splits=5, shuffle=True, random_state=0)
    scores = cross_val_score(reg, X_train_poly, y_train, cv=kf, scoring=make_scorer(r2_score))
    avg_r2=np.mean(scores)
    if(avg_r2<1 and avg_r2>0 and avg_r2>best_r2):
        best_r2=avg_r2
        best_params=params

print(best_r2)
print(best_params)

0.6247290821009486
{'degree': 5, 'include_bias': False, 'interaction_only': True}


In [4]:
best_params={'degree': 5, 'include_bias': False, 'interaction_only': True}
poly_features = PolynomialFeatures(**best_params)
reg=LinearRegression()
partsMetrics(df,reg,poly_features)

train rmse: 4.248262177722176
val rmse: 5.733036969822269
test rmse: 6.282212641682788

train si: 0.3899813103180552
val si: 0.4792125874091435
test si: 0.6119968373732352

train r2: 0.8275782563875735
val r2: 0.7525194064645571
test r2: 0.6851477215402815

train mape: 214.75651052265675
val mape: 82.73803294289864
test mape: 135.6418109813557


In [5]:
allMetrics(df,reg,poly_features)

all rmse: 4.602986226061733
all si: 0.41759106838728216
all r2: 0.8108986130151101
all mape: 206.34406529775674
