# Create model

In [27]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [28]:
# Load dataset
df = pd.read_csv("../data/wearable_tech_sleep_quality.csv")  # Replace with actual file path


In [29]:
# Filter out rows where Sleep Stage is 'Awake'
df = df[df["Sleep Stage"] != "Awake"]


In [30]:
# Drop the 'Body Temperature' attribute
df = df.drop(columns=["Body Temperature"])


In [31]:
# Define features and target
X = df.drop(columns=["Sleep Stage"])
y = df["Sleep Stage"]


In [32]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2001)


In [33]:
X_train

Unnamed: 0,heartrate,temperature,humidity
2290,68,23,68
2594,80,15,44
4734,56,16,63
2364,95,20,70
465,80,20,76
...,...,...,...
1356,74,16,30
3689,62,26,48
2776,56,25,45
3278,62,29,68


In [34]:
# Train Random Forest classifier
clf = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=4,
    min_samples_leaf=2,
    criterion="entropy",
    n_jobs=-1,
    random_state=1992
)
clf.fit(X_train, y_train)


In [35]:
# Make predictions
y_pred = clf.predict(X_test)


In [36]:
X_test

Unnamed: 0,heartrate,temperature,humidity
4459,89,24,68
3982,62,17,77
2211,53,20,58
2017,105,20,77
4327,95,29,65
...,...,...,...
3495,62,15,53
1263,106,24,60
3958,81,21,43
2763,68,28,52


In [37]:
y_pred

array(['Light', 'Deep', 'REM', 'Light', 'REM', 'Deep', 'REM', 'REM',
       'REM', 'REM', 'REM', 'Light', 'REM', 'Deep', 'REM', 'Light',
       'Light', 'Deep', 'Deep', 'REM', 'Light', 'REM', 'REM', 'REM',
       'REM', 'REM', 'Deep', 'Deep', 'Light', 'REM', 'REM', 'Deep', 'REM',
       'Light', 'Light', 'REM', 'REM', 'Light', 'Light', 'REM', 'REM',
       'Deep', 'Light', 'REM', 'Light', 'REM', 'REM', 'REM', 'Light',
       'Deep', 'Light', 'REM', 'Deep', 'REM', 'REM', 'Deep', 'REM', 'REM',
       'Light', 'Deep', 'Light', 'Light', 'Light', 'Light', 'Light',
       'Light', 'Light', 'REM', 'Light', 'Light', 'REM', 'REM', 'REM',
       'REM', 'REM', 'Deep', 'Light', 'Light', 'Light', 'Light', 'Light',
       'REM', 'REM', 'Deep', 'REM', 'Light', 'Deep', 'REM', 'Light',
       'REM', 'REM', 'Deep', 'REM', 'Light', 'REM', 'Deep', 'Light',
       'Light', 'Light', 'Light', 'Deep', 'Light', 'REM', 'Deep', 'REM',
       'Light', 'REM', 'REM', 'REM', 'Deep', 'Deep', 'Deep', 'Light',
       '

In [38]:
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.3467202141900937


# Save model

In [39]:
import pickle

# save the iris classification model as a pickle file
model_pkl_file = "sleep_stage_model.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(clf, file)