In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import Lasso,Ridge, ElasticNet, BayesianRidge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor
import warnings
warnings.filterwarnings('ignore')

  from numpy.core.umath_tests import inner1d


In [11]:
data = pd.read_csv('person_csvs/all_people.csv', index_col=0)
resp = [i for i in data.columns if 'RESP' in i and i != ' RESP']
data.drop(resp + ['Time [s]', 'sec'], axis=1, inplace=True)

In [12]:
data.head()

Unnamed: 0,RESP,V,AVR,II,HR,PULSE,V_Min,AVR_Min,II_Min,V_Max,...,II_Max,V_Mean,AVR_Mean,II_Mean,V_Kurt,AVR_Kurt,II_Kurt,V_Skw,AVR_Skw,II_Skw
0,0.23088,0.55518,0.70528,0.060059,96.0,102.0,0.49023,-0.34066,-0.4248,1.3696,...,1.5,0.610446,0.598002,0.197546,14.546472,11.03829,10.266315,3.618766,-2.722894,2.806052
1,0.23675,0.5498,0.70039,0.064941,96.0,102.0,0.49023,-0.34066,-0.4248,1.3696,...,1.5,0.610446,0.598002,0.197546,14.546472,11.03829,10.266315,3.618766,-2.722894,2.806052
2,0.24114,0.49512,0.70528,0.055176,96.0,102.0,0.49023,-0.34066,-0.4248,1.3696,...,1.5,0.610446,0.598002,0.197546,14.546472,11.03829,10.266315,3.618766,-2.722894,2.806052
3,0.24579,0.5249,0.52493,0.3999,96.0,102.0,0.49023,-0.34066,-0.4248,1.3696,...,1.5,0.610446,0.598002,0.197546,14.546472,11.03829,10.266315,3.618766,-2.722894,2.806052
4,0.24921,0.62988,0.069892,1.145,96.0,102.0,0.49023,-0.34066,-0.4248,1.3696,...,1.5,0.610446,0.598002,0.197546,14.546472,11.03829,10.266315,3.618766,-2.722894,2.806052


In [13]:
SS = StandardScaler()
X = data.drop(' RESP', axis=1)
SS.fit(X, y = None)
y = data[' RESP'].values
X = SS.transform(X)

In [16]:
models = {
#           'OLS':LinearRegression(),
#           'ElasticNet':ElasticNet(),
#           'BayesianRidge':BayesianRidge(),
#           'Lasso':Lasso(),
#           'Ridge':Ridge(),
#           'KNN':KNeighborsRegressor(),
          'RFF':RandomForestRegressor(),
#           'Ada': AdaBoostRegressor(),
#           'XGB':XGBRegressor()
         }

def model_performance(X, y):
    times = []
    keys = []
    mean_squared_errors = []
    mean_abs_error = []
    r2_scores = []
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 10)

    for k, v in tqdm(models.items()):
        model = v
        t0 = time.time()
        model.fit(X_train, y_train)
        train_time = time.time()-t0
        t1 = time.time()
        pred = model.predict(X_test)
        predict_time = time.time() - t1
        pred = pd.Series(pred)
        Time_total = train_time + predict_time
        times.append(Time_total)
        r2_scores.append(r2_score(y_test,pred))
        mean_squared_errors.append(mean_squared_error(y_test,pred))
        mean_abs_error.append(mean_absolute_error(y_test,pred))
        keys.append(k)
        
    table = pd.DataFrame({
                            'model':keys, 
                            'RMSE':mean_squared_errors, 
                            'MAE':mean_abs_error, 
                            'R2 score':r2_scores, 
                            'time':times
                        })
    
    table['RMSE'] = table['RMSE'].apply(lambda x: np.sqrt(x))
    
    return table

In [17]:
model_performance(X, y)

100%|██████████| 9/9 [15:47<00:00, 182.15s/it]


Unnamed: 0,MAE,R2 score,RMSE,model,time
0,0.293627,0.1187999,0.359239,OLS,1.279344
1,0.310653,-1.33168e-09,0.382689,ElasticNet,1.032249
2,0.293627,0.1188001,0.359239,BayesianRidge,2.062691
3,0.310653,-1.33168e-09,0.382689,Lasso,0.987775
4,0.293627,0.1187999,0.359239,Ridge,0.613171
5,0.097136,0.8014152,0.170537,KNN,162.766203
6,0.087372,0.820581,0.162099,RFF,253.116708
7,0.320699,0.08913112,0.365236,Ada,328.040219
8,0.245525,0.3747992,0.302591,XGB,196.523845
