# set up

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ensembles"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# import dataset

In [2]:
import pandas as pd
data = pd.read_excel(r'C:\Users\ASUS\Desktop\machine learning\MLfinalReport\AHIdata.xlsx')

In [3]:
df = pd.DataFrame(data, columns=['rescored AHI'])
print(type(df))#datatype is pd.dataframe not numpy array
df_2D = df.to_numpy()#convert into numpy array

<class 'pandas.core.frame.DataFrame'>


In [4]:
y_stack = df_2D.flatten()#flatten into 1d array

# extract inputs form excel

In [5]:
#BMI
x1 = pd.DataFrame(data, columns=['BMI'])
x1_2D = x1.to_numpy()#convert into numpy array
x_bmi = x1_2D.flatten()#flatten into 1d array
#Age
x2 = pd.DataFrame(data, columns=['Age'])
x2_2D = x2.to_numpy()#convert into numpy array
x_age = x2_2D.flatten()#flatten into 1d array
#neck
x3 = pd.DataFrame(data, columns=['neck'])
x3_2D = x3.to_numpy()#convert into numpy array
x_neck = x3_2D.flatten()#flatten into 1d array
#desaturatino index
x4 = pd.DataFrame(data, columns=['desaturation index'])
x4_2D = x4.to_numpy()#convert into numpy array
x_dsi = x4_2D.flatten()#flatten into 1d array
#CVHR-OR_CEI
x5 = pd.DataFrame(data, columns=['Aligned CVHR-OR-CEI'])
x5_2D = x5.to_numpy()#convert into numpy array
x_cvhrorcei = x5_2D.flatten()#flatten into 1d array
#CVHRI
x6 = pd.DataFrame(data, columns=['CVHRI'])
x6_2D = x6.to_numpy()#convert into numpy array
x_cvhri = x6_2D.flatten()#flatten into 1d array
#CEI
x7 = pd.DataFrame(data, columns=['CEI'])
x7_2D = x7.to_numpy()#convert into numpy array
x_cei = x7_2D.flatten()#flatten into 1d array

## decide your inputs

In [6]:
X_stack = np.stack((x_cvhrorcei, x_neck), axis=-1)

## train test validation split

In [7]:
from sklearn.model_selection import train_test_split 
X_trainval, X_test, y_trainval, y_test = train_test_split(X_stack, y_stack, test_size=0.1, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.4, shuffle=False)

## decide n_estimators and max_depth

In [8]:
from sklearn.model_selection import GridSearchCV
param_dist = {
    'n_estimators':range(500, 1500, 100),
    'max_depth':range(1, 20, 1),
    }


In [9]:
from sklearn.ensemble import RandomForestRegressor
rnd_rg = RandomForestRegressor(random_state=42)
grid = GridSearchCV(rnd_rg, param_dist, cv=3, scoring='neg_root_mean_squared_error', n_jobs=-1)
grid.fit(X_val, y_val)

GridSearchCV(cv=3, estimator=RandomForestRegressor(random_state=42), n_jobs=-1,
             param_grid={'max_depth': range(1, 20),
                         'n_estimators': range(500, 1500, 100)},
             scoring='neg_root_mean_squared_error')

In [10]:
grid.best_params_

{'max_depth': 1, 'n_estimators': 500}

In [11]:
rnd_rg = RandomForestRegressor(n_estimators=grid.best_params_.get('n_estimators'), max_depth=grid.best_params_.get('max_depth'), random_state=42)
rnd_rg.fit(X_train, y_train)
y_pred_rf = rnd_rg.predict(X_test)

## MAE

In [12]:
from sklearn.metrics import mean_absolute_error as mae
y_test_expect = rnd_rg.predict(X_test)
print(mae(y_test, y_test_expect))

18.08780342803769


## RMSE

In [13]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_pred_rf, squared=False))

20.404303130757484
