In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.model_selection import LeaveOneOut,cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import seaborn as sns
from sklearn.kernel_approximation import PolynomialCountSketch
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [8]:
def get_parameters(X,y,model):
    
    loo = LeaveOneOut()
    X1 = np.array(X)
    y = np.array(y)
    mean_ab_error = []
    mean_sq_error = []
    median_error = []
    pred = []
    y_true = []
    
    for train_index, test_index in loo.split(X1):
        X_train, X_test = X1[train_index], X1[test_index]
        y_train, y_test = y[train_index], y[test_index]
        lg.fit(X_train,y_train)
        predict_data = lg.predict(X_test)
        pred.append(predict_data)
        y_true.append(y_test)
        mean_ab_error.append(mean_absolute_error(y_test,predict_data))
        mean_sq_error.append(mean_squared_error(y_test,predict_data))
        median_error.append(median_absolute_error(y_test,predict_data))
    
    MAE = np.sum(np.array(mean_ab_error)) / len(mean_ab_error)
    MSE = np.sum(np.array(mean_sq_error)) / len(mean_sq_error)
    Median = np.median(np.array(median_error))
    SD = np.std(pred)
    y_new = np.array(y_true).reshape(-1)
    pred_new = np.array(pred).reshape(-1)
    r2 = r2_score(y_new,pred_new)
    
    return mean_ab_error,mean_sq_error,median_error,MAE,MSE,Median,SD,r2

In [24]:
def plot_combination(e1,e2,e3):
    
    ax1 = plt.subplot(1, 3, 1)
    ax2 = plt.subplot(1, 3, 2)
    ax3 = plt.subplot(1, 3, 3)
    sns.histplot(e1,ax = ax1, color = 'maroon',kde = True, fill = False)
    sns.histplot(e2, ax = ax2, color = 'darkgreen', kde = True, fill = False)
    sns.histplot(e3, ax = ax3, color = 'darkblue', kde = True, fill = False)
    ax1.set_title("MAE")
    ax2.set_title("MSE")
    ax3.set_title("MedianSE")
    ax1.set_xlabel("Error")
    ax2.set_xlabel("Error")
    ax3.set_xlabel("Error")
    plt.tight_layout()
    plt.show()

In [5]:
#generate and filter the data
data = pd.read_csv('new_data.csv')
data['Sex'] = data['Sex'].replace('M',1)
data['Sex'] = data['Sex'].replace('F',0)
data['Side'] = data['Side'].replace('L',0)
data['Side'] = data['Side'].replace('R',1)
X = data[['Age','Sex','BMI','GX AT VO2 (mL/kg/min)','GX AT VE/VCO2','GX VO2 Max VO2 (mL/kg/min)','GX VO2 Max VO2/Pred (%)','GX VO2 Max VO2/HR (mL/beat)','GX Predicted VO2/HR (mL/beat)','GX Work Max Work (Watts)','GX Predicted Work (Watts)','PF Pre FEV1/FVC (%)','PF Pre FEV1 (L)','Side','pred mort','LOS']]
X = X.dropna(axis=0)

In [6]:
#separate X,y
X1 = X[['Age','Sex','BMI','GX AT VO2 (mL/kg/min)','GX AT VE/VCO2','GX VO2 Max VO2 (mL/kg/min)','GX VO2 Max VO2/Pred (%)','GX VO2 Max VO2/HR (mL/beat)','GX Predicted VO2/HR (mL/beat)','GX Work Max Work (Watts)','GX Predicted Work (Watts)','PF Pre FEV1/FVC (%)','PF Pre FEV1 (L)','Side','pred mort']]
y = X[['LOS']]

In [18]:
lg = LinearRegression()
lg_mean_ab_error,lg_mean_sq_error,lg_median_error,lg_MAE,lg_MSE,lg_Median,lg_SD,lg_r2 = get_parameters(X1,y,lg)

In [19]:
ridge = linear_model.Ridge(alpha=1.0)
rg_mean_ab_error,rg_mean_sq_error,rg_median_error,rg_MAE,rg_MSE,rg_Median,rg_SD,rg_r2 = get_parameters(X1,y,ridge)

In [21]:
lasso = Lasso(alpha=1.0)
ls_mean_ab_error,ls_mean_sq_error,ls_median_error,ls_MAE,ls_MSE,ls_Median,ls_SD,ls_r2 = get_parameters(X1,y,lasso)

In [77]:
els = ElasticNet(alpha=1.0)
els_mean_ab_error,els_mean_sq_error,els_median_error,els_MAE,els_MSE,els_Median,els_SD,els_r2 = get_parameters(X1,y,els)