In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

In [2]:
df = pd.read_csv('new_dataset.csv')
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low,Sick
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,No Disorder,126,83,0
1,2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80,0
2,3,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80,0
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90,1
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90,1


In [3]:
df.drop(['Person ID', 'Sick'], axis=1, inplace=True)


In [4]:
numerical_features = df.select_dtypes(include=np.number).columns.tolist()
numerical_features.remove('Stress Level')
numerical_features


['Age',
 'Sleep Duration',
 'Quality of Sleep',
 'Physical Activity Level',
 'Heart Rate',
 'Daily Steps',
 'BP High',
 'BP Low']

In [5]:
X = df.drop(['Stress Level'], axis=1)
y = df['Stress Level']

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X[numerical_features] = scaler.fit_transform(X[numerical_features])


In [7]:
from sklearn.preprocessing import LabelEncoder

gender_le = LabelEncoder()
occupation_le = LabelEncoder()
bmiCategory_le = LabelEncoder()
sleepDisorder_le = LabelEncoder()

In [8]:
X['Gender'] = gender_le.fit_transform(X['Gender'])
X['Occupation'] = occupation_le.fit_transform(X['Occupation'])
X['BMI Category'] = bmiCategory_le.fit_transform(X['BMI Category'])
X['Sleep Disorder'] = sleepDisorder_le.fit_transform(X['Sleep Disorder'])



In [9]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(random_state=0)
regressor.fit(X, y)

In [32]:
prediction = {
    'Gender': 'Male',
    'Age': 35,
    'Occupation': 'Teacher',
    'Sleep Duration': 8,
    'Quality of Sleep': 4,
    'Physical Activity Level': 50,
    'BMI Category': 'Obese',
    'Heart Rate': 80,
    'Daily Steps': 5000,
    'Sleep Disorder': 'No Disorder',
    'BP High': 120,
    'BP Low': 90
}

prediction = pd.DataFrame(prediction, index=[0])
prediction
 

Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low
0,Male,35,Teacher,8,4,50,Obese,80,5000,No Disorder,120,90


In [33]:
prediction['Gender'] = gender_le.transform(prediction['Gender'])
prediction['Occupation'] = occupation_le.transform(prediction['Occupation'])
prediction['BMI Category'] = bmiCategory_le.transform(prediction['BMI Category'])
prediction['Sleep Disorder'] = sleepDisorder_le.transform(prediction['Sleep Disorder'])

prediction[numerical_features] = scaler.transform(prediction[numerical_features])
prediction


Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low
0,1,-0.829471,10,1.092276,-2.771424,-0.440857,1,2.381086,-1.124458,1,-1.105421,0.869486


In [34]:
y_pred = regressor.predict(prediction)
y_pred[0]

6.62

In [37]:
import pickle

data = {"model": regressor, 'scaler': scaler, "gender_le": gender_le, "occupation_le": occupation_le, "bmiCategory_le": bmiCategory_le, "sleepDisorder_le": sleepDisorder_le}

In [38]:
with open("model.pk1", "wb") as file:
    pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)

In [39]:
with open("model.pk1", "rb") as file:
    data = pickle.load(file)
    
regressor = data["model"]
gender_le = data['gender_le']
occupation_le = data['occupation_le']
bmiCategory_le = data['bmiCategory_le']
sleepDisorder_le = data['sleepDisorder_le']

In [40]:
col = 'Quality of Sleep'

df[col].min(), df[col].max()

(4, 9)

In [41]:
col = 'Stress Level'

df[col].min(), df[col].max()

(3, 8)

In [42]:
fig = px.area(df, x='Quality of Sleep', y='Stress Level', color='Gender')
fig.update_layout(
    title='Quality of Sleep vs Stress Level',
    xaxis_title='Quality of Sleep',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    )
)
fig.show()

In [19]:
fig = px.area(df, x='Sleep Duration', y='Stress Level', color='Gender')
fig.update_layout(
    title='Sleep Duration vs Stress Level',
    xaxis_title='Sleep Duration',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    )
)
fig.show()