In [None]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error

In [None]:
x_trains = []
y_trains = []
x_tests = []
y_tests = []
train_files = ["train1.csv", "train2.csv", "train3.csv", "train4.csv", "train5.csv"]
test_files = ["test1.csv", "test2.csv", "test3.csv", "test4.csv", "test5.csv"]

In [None]:
for i in range(len(train_files)):
  train = pd.read_csv(train_files[i], index_col=0)
  test = pd.read_csv(test_files[i], index_col=0)
  x_train, y_train = train.iloc[:, :-1], train.iloc[:, -1]
  x_test, y_test = test.iloc[:, :-1], test.iloc[:, -1]
  x_trains.append(x_train)
  y_trains.append(y_train)
  x_tests.append(x_test)
  y_tests.append(y_test)


In [None]:
rf_mse = []
rf_mae = []
rf_log_rmse = []
rf = RandomForestRegressor()
rf_params = {'n_estimators':[i for i in range(50, 101, 10)], 'max_depth':[i for i in range(6,11, 2)]}
for i in range(len(x_trains)):
  grid_rf = GridSearchCV(estimator=rf, param_grid=rf_params, n_jobs=-1).fit(x_trains[i], y_trains[i])
  predictions = grid_rf.predict(x_tests[i])
  mse = mean_squared_error(predictions, y_tests[i])
  mae = mean_absolute_error(predictions, y_tests[i])
  log_mse = mean_squared_log_error(predictions, y_tests[i])
  rf_mse.append(mse)
  rf_mae.append(mae)
  rf_log_rmse.append(log_mse)
  print("testing on " + str(i) + " split is done!")
print("Average mse is: ", sum(rf_mse)/len(rf_mse))
print("Average mae is: ", sum(rf_mae)/len(rf_mse))
print("Average log_mse is: ", sum(rf_log_rmse)/len(rf_mse))

testing on 0 split is done!
testing on 1 split is done!
testing on 2 split is done!
testing on 3 split is done!
testing on 4 split is done!
Average mse is:  1382.6312140531702
Average mae is:  28.502390514911667
Average log_mse is:  0.13320176253515553


In [None]:
ad_mse = []
ad_mae = []
ad_log_rmse = []
ad = AdaBoostRegressor(DecisionTreeRegressor(max_depth=10))
ad_params = {'n_estimators':[i for i in range(50, 101, 10)], 'loss':['linear',  'exponential'], 'learning_rate':[0.5, 1]}
for i in range(len(x_trains)):
  grid_ad = GridSearchCV(estimator=ad, param_grid=ad_params, n_jobs=-1).fit(x_trains[i], y_trains[i])
  predictions = grid_ad.predict(x_tests[i])
  mse = mean_squared_error(predictions, y_tests[i])
  mae = mean_absolute_error(predictions, y_tests[i])
  log_mse = mean_squared_log_error(predictions, y_tests[i])
  ad_mse.append(mse)
  ad_mae.append(mae)
  ad_log_rmse.append(log_mse)
  print("testing on " + str(i) + " split is done!")
print("Average mse is: ", sum(ad_mse)/len(ad_mse))
print("Average mae is: ", sum(ad_mae)/len(ad_mse))
print("Average log_mse is: ", sum(ad_log_rmse)/len(ad_mse))

testing on 0 split is done!
testing on 1 split is done!
testing on 2 split is done!
testing on 3 split is done!
testing on 4 split is done!
Average mse is:  1554.8025768444868
Average mae is:  32.13536727539103
Average log_mse is:  0.16919188484047182
