# Pre-Processing Dataset

In [1]:
# pandas, min max scaler, train test split, random forest, confusion matrix, accuracy
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
df = pd.read_csv("Application/SaYoPillow.csv")
df.head()

Unnamed: 0,snr,rr,t,lm,bo,rem,slr,hr,sl
0,93.8,25.68,91.84,16.6,89.84,99.6,1.84,74.2,3
1,91.64,25.104,91.552,15.88,89.552,98.88,1.552,72.76,3
2,60.0,20.0,96.0,10.0,95.0,85.0,7.0,60.0,1
3,85.76,23.536,90.768,13.92,88.768,96.92,0.768,68.84,3
4,48.12,17.248,97.872,6.496,96.248,72.48,8.248,53.12,0


In [3]:
df.isnull().sum() #Tidak ada yang Null

snr    0
rr     0
t      0
lm     0
bo     0
rem    0
slr    0
hr     0
sl     0
dtype: int64

In [4]:
df.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
615    False
616    False
617    False
618    False
619    False
Length: 620, dtype: bool

In [5]:
# snr: snoring rate; rr: respiration rate; t: body temperature; lm: limb movement; bo: blood oxygen;
# rem: eye movement; slr: sleeping hours; hr: heart rate; sl: stress level
# 0-low, 1–medium low, 2-medium, 3-medium high, 4-high
df.columns = ["TingkatMendengkur", "TingkatPernapasan", "SuhuTubuh", "GerakanKaki", 
"OksigenDarah", "GerakanMata", "JamTidur", "DetakJantung", "TingkatStres"]
df.head()

Unnamed: 0,TingkatMendengkur,TingkatPernapasan,SuhuTubuh,GerakanKaki,OksigenDarah,GerakanMata,JamTidur,DetakJantung,TingkatStres
0,93.8,25.68,91.84,16.6,89.84,99.6,1.84,74.2,3
1,91.64,25.104,91.552,15.88,89.552,98.88,1.552,72.76,3
2,60.0,20.0,96.0,10.0,95.0,85.0,7.0,60.0,1
3,85.76,23.536,90.768,13.92,88.768,96.92,0.768,68.84,3
4,48.12,17.248,97.872,6.496,96.248,72.48,8.248,53.12,0


In [6]:
X = df.drop("TingkatStres", axis="columns")
Y = df["TingkatStres"]

In [7]:
Xmin = X.min()
Xmax = X.max()

In [8]:
X = MinMaxScaler().fit_transform(X)
X

array([[0.88727273, 0.69142857, 0.48857143, ..., 0.88      , 0.20444444,
        0.69142857],
       [0.848     , 0.65028571, 0.468     , ..., 0.864     , 0.17244444,
        0.65028571],
       [0.27272727, 0.28571429, 0.78571429, ..., 0.55555556, 0.77777778,
        0.28571429],
       ...,
       [0.95461818, 0.82171429, 0.13428571, ..., 0.93066667, 0.        ,
        0.82171429],
       [0.248     , 0.26628571, 0.76628571, ..., 0.54044444, 0.74755556,
        0.26628571],
       [0.52581818, 0.38514286, 0.59942857, ..., 0.71022222, 0.45422222,
        0.38514286]])

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, random_state=33)
X_train.shape, X_test.shape # train:496 test:124

((496, 8), (124, 8))

# Membuat Model Random Forest

In [10]:
rfc = RandomForestClassifier(n_estimators=99).fit(X_train, Y_train)

In [11]:
prediction = rfc.predict(X_test)
prediction

array([1, 0, 2, 3, 1, 0, 0, 3, 3, 0, 0, 1, 2, 4, 1, 4, 1, 0, 0, 4, 2, 0,
       2, 1, 2, 2, 1, 2, 1, 0, 2, 1, 2, 4, 0, 2, 1, 1, 4, 4, 3, 1, 1, 3,
       4, 0, 0, 2, 0, 4, 3, 1, 0, 3, 2, 4, 4, 4, 4, 0, 2, 2, 0, 2, 0, 2,
       2, 4, 1, 0, 3, 1, 1, 0, 0, 3, 0, 3, 1, 4, 1, 3, 3, 3, 4, 0, 4, 0,
       1, 4, 3, 0, 1, 2, 0, 4, 4, 3, 3, 0, 0, 4, 1, 0, 4, 3, 4, 1, 2, 1,
       0, 3, 3, 2, 0, 0, 2, 1, 0, 2, 4, 3, 2, 3], dtype=int64)

In [12]:
confusion_matrix = confusion_matrix(Y_test, prediction)
confusion_matrix

array([[32,  0,  0,  0,  0],
       [ 0, 25,  0,  0,  0],
       [ 0,  0, 23,  0,  0],
       [ 0,  0,  0, 21,  0],
       [ 0,  0,  0,  0, 23]], dtype=int64)

In [13]:
accuracy_score = accuracy_score(Y_test, prediction)
accuracy_score

1.0