##於血液透析前施打EPO，預測2週後貧血改善幅度

In [10]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
import math
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import LinearRegression
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, max_error, r2_score
import joblib

In [11]:
def display(arr: np.ndarray):
    infos = {
        'min': np.amin(arr),
        'max': np.amax(arr),
        'dtype': arr.dtype,
        'size': arr.shape
    }

    return infos

讀檔

In [12]:
#讀取excel檔案
df = pd.read_csv('./data/dialysis/Regression_2weeks_data_v2.csv')

分割訓練集及測試集

In [13]:
#將df以7:3比例分為training set與test set
train_df, test_df = train_test_split(df, test_size = 0.3, random_state = 0)

正規化及標準化

In [14]:
X_train = train_df.iloc[:,0:-1]  #X_train為取出train_df除了label以外的欄位
y_train = train_df.iloc[:,-1]   #y_train為train_df的label
X_test = test_df.iloc[:,0:-1]   #X_test為取出test_df除了label以外的欄位
y_test = test_df.iloc[:,-1]    #y_test為test_df的label
scaler_Std = preprocessing.StandardScaler().fit(X_train)
X_train_Std = scaler_Std.transform(X_train)
X_test_Std = scaler_Std.transform(X_test)
X_Std = scaler_Std.transform(df.iloc[:,0:-1])
y = df.iloc[:,-1]

In [15]:
joblib.dump(scaler_Std, "./hw2/dialysis-regression-14-std.bin")

['./hw2/dialysis-regression-14-std.bin']

In [16]:
def regression_model_evaluation_result(X,y,model):
    ''' Training Model, Evaluation, Indices
    Args:
        X: (pd.DataFrame) data
        y: (pd.Series) labels of training data
    Returns:
        results: (dict) evaluation indices
    '''
    pred = model.predict(X)
    

    # Evaluation: Training
    size, var_num = X.shape
    R2 = r2_score(y, pred)
    adj_R2 = 1-(1-R2)*(size-1)/(size-var_num-1)
    MAE = mean_absolute_error(y, pred)
    MSE = mean_squared_error(y, pred)
    RMSE = (MSE ** 0.5)
    Max_error = max_error(y, pred)
    svr_train = pd.DataFrame(data = {"actual values":y, "predicted values":pred})

    # Summary
    results = {"Max error":Max_error, "MAE":MAE, "MSE":MSE, "RMSE":RMSE, "R2":R2, "Adjusted R2": adj_R2}

    return results
    

SVR model

In [17]:
svr_model=SVR(kernel = 'rbf',C = 4,gamma=0.125)
svr_model.fit(X_Std, y)
results = regression_model_evaluation_result(X_Std, y, svr_model)

In [18]:
print(results)

{'Max error': 9.356970367995356, 'MAE': 0.1569248050824759, 'MSE': 0.4860670718426748, 'RMSE': 0.6971851058669245, 'R2': 0.7180764379925229, 'Adjusted R2': 0.6981240915385579}


In [19]:
joblib.dump(svr_model, "./hw2/dialysis-regression-14-svr.model")

['./hw2/dialysis-regression-14-svr.model']

In [20]:
elastic_cv = linear_model.ElasticNetCV(cv=5,random_state=0,l1_ratio=0.1)
elastic_cv.fit(X_Std, y)
elastic_cv.alpha_

0.5075211448048668

In [21]:
elastic_model = linear_model.ElasticNet(alpha=0.5075211448048668,l1_ratio=0.1)
elastic_model.fit(X_Std, y)
results = regression_model_evaluation_result(X_Std, y, elastic_model)

In [22]:
print(results)

{'Max error': 12.660579516173716, 'MAE': 0.6203328776634416, 'MSE': 1.6013632962169917, 'RMSE': 1.2654498394709257, 'R2': 0.0711939345612499, 'Adjusted R2': 0.005460299975267868}


In [23]:
joblib.dump(elastic_model, "./hw2/dialysis-regression-14-elastic.model")

['./hw2/dialysis-regression-14-elastic.model']