In [None]:
import pandas as pd
from sklearn.metrics import r2_score,make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from metrics import partsMetrics,allMetrics

In [2]:
df=pd.read_csv('processed.csv')
print('Data shape:',df.shape)

Data shape: (467, 7)


In [3]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
print(X.shape)
print(y.shape)

(467, 6)
(467,)


In [4]:
param_grid={
    'criterion': ['squared_error','absolute_error','friedman_mse','poisson'],
    'n_estimators': [10,50,100,200,300],
    'max_features': [None,1,2,3,4,5,'sqrt','log2'],
    'max_depth': [None,2,4,5,6],
    'min_samples_leaf': [1,3,4,5],
    'min_samples_split': [2,4,6,8,10],
    'bootstrap': [True,False],
    'min_weight_fraction_leaf': [0.0,.1]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

def train_model(X_train,y_train):
    reg = RandomForestRegressor(random_state=0,n_jobs=-1)
    grid=GridSearchCV(reg,param_grid,cv=5,scoring=make_scorer(r2_score),n_jobs=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    print(grid.best_score_)
    return grid

In [28]:
model=train_model(X_train,y_train)



{'bootstrap': False, 'criterion': 'absolute_error', 'max_depth': None, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 300}
0.9038477368718623


In [6]:
best_params={'bootstrap': False, 'criterion': 'absolute_error', 'max_depth': None, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 300}
reg=RandomForestRegressor(**best_params,random_state=0)
partsMetrics(df,reg)

train rmse: 0.15521682223509767
val rmse: 3.8702038772073464
test rmse: 4.677578334447881

train si: 0.014248569694232894
val si: 0.32350225954585604
test si: 0.4556775312337819

train r2: 0.9997698309176348
val r2: 0.8872181143259471
test r2: 0.8254484481729941

train mape: 0.015216745060496426
val mape: 9.896447048703768
test mape: 104.01713491166436


In [7]:
allMetrics(df,reg)

all rmse: 0.18600672318672734
all si: 0.016874859590709793
all r2: 0.9996912033020663
all mape: 0.014377773061840368
