In [1]:
import numpy    as np
import pandas   as pd
import seaborn  as sb
import matplotlib.pyplot as plt
import sklearn  as skl
import time
from IPython.display import clear_output

from sklearn import pipeline      # Pipeline, make_pipeline
from sklearn import preprocessing # StandardScaler, OrdinalEncoder, LabelEncoder
from sklearn import impute
from sklearn import compose
from sklearn import model_selection # train_test_split, StratifiedKFold, cross_val_score, cross_val_predict
from sklearn import metrics         # accuracy_score, balanced_accuracy_score, plot_confusion_matrix
from sklearn import inspection      # permutation_importance
import scikitplot as skplt

In [2]:
########################################################### CLASSIFIERS

#### MULT
from sklearn.linear_model   import LogisticRegression
from sklearn.linear_model   import RidgeClassifier
from sklearn.svm            import SVC
from sklearn.svm            import NuSVC
from sklearn.svm            import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors      import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes    import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble       import StackingClassifier

#### TREE
from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.experimental  import enable_hist_gradient_boosting
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier, plot_tree
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier
#from ngboost               import NGBClassifier
#from rgf.sklearn           import RGFClassifier, FastRGFClassifier

########################################################### REGRESSORS
from sklearn.linear_model  import ElasticNet, Ridge, Lasso, BayesianRidge, ARDRegression, TweedieRegressor
from sklearn.svm           import LinearSVR, NuSVR, SVR
from sklearn.ensemble      import BaggingRegressor
from sklearn.kernel_ridge  import KernelRidge

In [4]:
!ls ../../csv_files/FS_combos/UNI/binary_num/

UNI_test_binary_num.csv  UNI_train_binary_num.csv


In [6]:
path = "../../csv_files/FS_combos/UNI/binary_num/"

In [7]:
df      = pd.read_csv(path + "UNI_train_binary_num.csv", index_col="Unnamed: 0")
df_test = pd.read_csv(path + "UNI_test_binary_num.csv", index_col="Unnamed: 0")

### Select data for experiment

In [9]:
x = df.drop(columns=["target", 'id']) # X DATA (WILL BE TRAIN+VALID DATA)
y = df["target"] # 0 = No, 1 = Yes

x_test = df_test.drop(columns=['id']) # # X_TEST DATA (NEW DATA)

In [10]:
x_train, x_val, y_train, y_val = model_selection.train_test_split(
    x, y,
    test_size=0.2,
    random_state=0
)

In [11]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression()

In [12]:
y_pred_regr = lr.predict(x_val)
y_pred_regr

array([7.50746531, 7.33089856, 7.48514084, ..., 7.33837483, 7.38716055,
       7.48462291])

In [13]:
from sklearn.metrics import mean_squared_error

In [14]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8805124682408102

In [15]:
#lets see what the PCA is 
from sklearn.linear_model import ARDRegression
ar = ARDRegression()
ar.fit(x_train, y_train)

ARDRegression()

In [16]:
y_pred_regr = ar.predict(x_val)
y_pred_regr

array([7.5069695 , 7.33213189, 7.48564688, ..., 7.33843498, 7.38670458,
       7.48458301])

In [17]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8805045329524643

In [18]:
from sklearn.linear_model import BayesianRidge
br = BayesianRidge()
br.fit(x_train, y_train)

BayesianRidge()

In [19]:
y_pred_regr = br.predict(x_val)
y_pred_regr

array([7.50730323, 7.33115565, 7.48518143, ..., 7.33840251, 7.38706802,
       7.48455844])

In [20]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8805058444075908

In [21]:
en = ElasticNet()

In [22]:
en.fit(x_train, y_train)

ElasticNet()

In [23]:
y_pred_regr = en.predict(x_val)
y_pred_regr

array([7.45485702, 7.45485702, 7.45485702, ..., 7.45485702, 7.45485702,
       7.45485702])

In [24]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8854278096232179

In [25]:
r = Ridge()

In [26]:
r.fit(x_train, y_train)

Ridge()

In [27]:
y_pred_regr = r.predict(x_val)
y_pred_regr

array([7.50746423, 7.3309081 , 7.48514514, ..., 7.33837733, 7.38716175,
       7.48462378])

In [28]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8805103322874438

In [29]:
l = Lasso()
l.fit(x_train, y_train)

Lasso()

In [30]:
y_pred_regr = l.predict(x_val)
y_pred_regr

array([7.45485702, 7.45485702, 7.45485702, ..., 7.45485702, 7.45485702,
       7.45485702])

In [31]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8854278096232179

In [32]:
tr = TweedieRegressor()
tr.fit(x_train, y_train)

TweedieRegressor()

In [33]:
y_pred_regr = tr.predict(x_val)
y_pred_regr

array([7.45993663, 7.43488854, 7.45986793, ..., 7.43325353, 7.43671551,
       7.45873975])

In [34]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8841769281687416

In [35]:
lsvr = LinearSVR()
lsvr.fit(x_train, y_train)

LinearSVR()

In [36]:
y_pred_regr = lsvr.predict(x_val)
y_pred_regr

array([7.56619849, 7.31418591, 7.5365932 , ..., 7.3807981 , 7.33224228,
       7.57443134])

In [37]:
mse = mean_squared_error(y_pred_regr, y_val, squared=False)
mse

0.8849741313392324