# set up

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ensembles"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# import dataset

In [2]:
import pandas as pd
data = pd.read_excel(r'C:\Users\ASUS\Desktop\machine learning\MLfinalReport\AHIdata.xlsx')

In [3]:
df = pd.DataFrame(data, columns=['rescored AHI'])
print(type(df))#datatype is pd.dataframe not numpy array
df_2D = df.to_numpy()#convert into numpy array

<class 'pandas.core.frame.DataFrame'>


In [4]:
y_stack = df_2D.flatten()#flatten into 1d array

# extract inputs form excel

In [5]:
#BMI
x1 = pd.DataFrame(data, columns=['BMI'])
x1_2D = x1.to_numpy()#convert into numpy array
x_bmi = x1_2D.flatten()#flatten into 1d array
#Age
x2 = pd.DataFrame(data, columns=['Age'])
x2_2D = x2.to_numpy()#convert into numpy array
x_age = x2_2D.flatten()#flatten into 1d array
#neck
x3 = pd.DataFrame(data, columns=['neck'])
x3_2D = x3.to_numpy()#convert into numpy array
x_neck = x3_2D.flatten()#flatten into 1d array
#desaturatino index
x4 = pd.DataFrame(data, columns=['desaturation index'])
x4_2D = x4.to_numpy()#convert into numpy array
x_dsi = x4_2D.flatten()#flatten into 1d array
#CVHR-OR_CEI
x5 = pd.DataFrame(data, columns=['Aligned CVHR-OR-CEI'])
x5_2D = x5.to_numpy()#convert into numpy array
x_cvhrorcei = x5_2D.flatten()#flatten into 1d array
#CVHRI
x6 = pd.DataFrame(data, columns=['CVHRI'])
x6_2D = x6.to_numpy()#convert into numpy array
x_cvhri = x6_2D.flatten()#flatten into 1d array
#CEI
x7 = pd.DataFrame(data, columns=['CEI'])
x7_2D = x7.to_numpy()#convert into numpy array
x_cei = x7_2D.flatten()#flatten into 1d array

In [6]:
X_stack = np.stack((x_cvhri, x_cei), axis=-1)

In [7]:
from sklearn.model_selection import train_test_split 
X_trainval, X_test, y_trainval, y_test = train_test_split(X_stack, y_stack, test_size=0.15, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.4, shuffle=False)

### code from hands on ML

In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor(max_depth=1, random_state=42)
rnd_reg = RandomForestRegressor(n_estimators=1200, max_depth=2, random_state=42)
svm_reg = SVR(kernel='rbf', gamma=0.1, C=1000, epsilon=10)


voting_reg = VotingRegressor(
    estimators=[('tree', tree_reg), ('rf', rnd_reg), ('svc', svm_reg)])

In [9]:
voting_reg.fit(X_train, y_train)

VotingRegressor(estimators=[('tree',
                             DecisionTreeRegressor(max_depth=1,
                                                   random_state=42)),
                            ('rf',
                             RandomForestRegressor(max_depth=2,
                                                   n_estimators=1200,
                                                   random_state=42)),
                            ('svc', SVR(C=1000, epsilon=10, gamma=0.1))])

In [10]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error as mae
for reg in (tree_reg, rnd_reg, svm_reg, voting_reg):
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    print(reg.__class__.__name__, mae(y_test, y_pred))

DecisionTreeRegressor 17.968794649642806
RandomForestRegressor 16.575325138390248
SVR 25.67287094472156
VotingRegressor 19.750216900312466


# testing data evaluation

## MAE

In [11]:
from sklearn.metrics import mean_absolute_error as mae
y_test_expect = voting_reg.predict(X_test)
print(mae(y_test, y_test_expect))

19.750216900312466


## RMSE

In [12]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_test_expect, squared=False))

23.52436787814062
