## Imports

In [69]:
import time
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import r2_score
from joblib import dump, load

## Load Test Data

In [79]:
df = pd.read_csv('test_data_reg.csv', sep=',', header=None)
df = df.iloc[1:, 1:]

In [80]:
X = df.iloc[:,1:]
y = df.iloc[:,0]

In [88]:
scaler = load('minmaxscaler.joblib')

In [89]:
X = scaler.transform(X)

## Load Models

### Load Models That Didn't Use Dimensionality Reduction

In [90]:
lin_no_dr_clf = load('models/linear_no_dr.joblib')
ridge_no_dr_clf = load('models/ridge_no_dr.joblib')
lasso_no_dr_clf = load('models/lasso_no_dr.joblib')
xgboost_no_dr_clf = load('models/xgboost_no_dr.joblib')
adaboost_no_dr_clf = load('models/adaboost_no_dr.joblib')

### Load Models That Used Dimensionality Reduction

In [91]:
lin_dr_clf = load('models/linear_dr.joblib')
ridge_dr_clf = load('models/ridge_dr.joblib')
lasso_dr_clf = load('models/lasso_dr.joblib')
xgboost_dr_clf = load('models/xgboost_dr.joblib')
adaboost_dr_clf = load('models/adaboost_dr.joblib')

## Print Scores

### Print Scores of Models That Didn't Use Dimensionality Reduction

In [92]:
print('R2 Score:')
print('Linear: ' + str(lin_no_dr_clf.score(X, y)))
print('Ridge: ' + str(ridge_no_dr_clf.score(X, y)))
print('Lasso: ' + str(lasso_no_dr_clf.score(X, y)))
print('XGBoost: ' + str(xgboost_no_dr_clf.score(X, y)))
print('Adaboost: ' + str(adaboost_no_dr_clf.score(X, y)))

R2 Score:
Linear: 0.23556746768086068
Ridge: 0.23565093125332204
Lasso: -7.165243768914031e-05
XGBoost: 0.2873631468024399
Adaboost: -0.41020066600616834


### Load PCA Data Transformer

In [93]:
pca = load('pca.joblib')

### Print Scores of Models That Used Dimensionality Reduction

In [94]:
x_dr = pca.transform(X)
print('R2 Score:')
print('Linear: ' + str(lin_dr_clf.score(x_dr, y)))
print('Ridge: ' + str(ridge_dr_clf.score(x_dr, y)))
print('Lasso: ' + str(lasso_dr_clf.score(x_dr, y)))
print('XGBoost: ' + str(xgboost_dr_clf.score(x_dr, y)))
print('Adaboost: ' + str(adaboost_dr_clf.score(x_dr, y)))

R2 Score:
Linear: 0.2184855789362672
Ridge: 0.2184720772778065
Lasso: -7.165243768914031e-05
XGBoost: 0.2721456960670422
Adaboost: -0.415355531343508
