In [78]:
# Importing essential libraries
import pandas as pd

# Load the dataset
df = pd.read_csv('sleep_data.csv')

# Display the first few rows
df.head()


Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [79]:
# 1. Split Blood Pressure into two columns: Systolic and Diastolic
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic'] = df['Systolic'].astype(float)
df['Diastolic'] = df['Diastolic'].astype(float)

# 2. Drop unnecessary columns
df.drop(['Person ID', 'Blood Pressure', 'Sleep Disorder'], axis=1, inplace=True)

# 3. Convert categorical columns to numeric using one-hot encoding
df = pd.get_dummies(df, columns=['Gender', 'Occupation', 'BMI Category'], drop_first=True)

# 4. Check for missing values
print("Missing values:\n", df.isnull().sum())

# View cleaned data
df.head()


Missing values:
 Age                                0
Sleep Duration                     0
Quality of Sleep                   0
Physical Activity Level            0
Stress Level                       0
Heart Rate                         0
Daily Steps                        0
Systolic                           0
Diastolic                          0
Gender_Male                        0
Occupation_Doctor                  0
Occupation_Engineer                0
Occupation_Lawyer                  0
Occupation_Manager                 0
Occupation_Nurse                   0
Occupation_Sales Representative    0
Occupation_Salesperson             0
Occupation_Scientist               0
Occupation_Software Engineer       0
Occupation_Teacher                 0
BMI Category_Normal Weight         0
BMI Category_Obese                 0
BMI Category_Overweight            0
dtype: int64


Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,Systolic,Diastolic,Gender_Male,...,Occupation_Manager,Occupation_Nurse,Occupation_Sales Representative,Occupation_Salesperson,Occupation_Scientist,Occupation_Software Engineer,Occupation_Teacher,BMI Category_Normal Weight,BMI Category_Obese,BMI Category_Overweight
0,27,6.1,6,42,6,77,4200,126.0,83.0,True,...,False,False,False,False,False,True,False,False,False,True
1,28,6.2,6,60,8,75,10000,125.0,80.0,True,...,False,False,False,False,False,False,False,False,False,False
2,28,6.2,6,60,8,75,10000,125.0,80.0,True,...,False,False,False,False,False,False,False,False,False,False
3,28,5.9,4,30,8,85,3000,140.0,90.0,True,...,False,False,True,False,False,False,False,False,True,False
4,28,5.9,4,30,8,85,3000,140.0,90.0,True,...,False,False,True,False,False,False,False,False,True,False


In [80]:
# Define the target variable (label)
y = df['Quality of Sleep']

# Define the feature variables
X = df.drop('Quality of Sleep', axis=1)

# Display the shape of X and y
print("Feature shape:", X.shape)
print("Target shape:", y.shape)


Feature shape: (374, 22)
Target shape: (374,)


In [81]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Train Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# 3. Predict on test set
y_pred_lr = lr_model.predict(X_test)

# 4. Evaluate the model
print("Linear Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred_lr))
print("MSE:", mean_squared_error(y_test, y_pred_lr))
print("R² Score:", r2_score(y_test, y_pred_lr))


Linear Regression Performance:
MAE: 0.11620901130092283
MSE: 0.049974863026046305
R² Score: 0.9668738387318512


In [82]:
from sklearn.ensemble import RandomForestRegressor

# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict
y_pred_rf = rf_model.predict(X_test)

# Evaluate
print("Random Forest Regressor Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred_rf))
print("MSE:", mean_squared_error(y_test, y_pred_rf))
print("R² Score:", r2_score(y_test, y_pred_rf))


Random Forest Regressor Performance:
MAE: 0.046666666666666676
MSE: 0.026712000000000014
R² Score: 0.9822937779872731


In [83]:
from sklearn.neighbors import KNeighborsRegressor

# Train KNN model (you can tune n_neighbors later)
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Predict
y_pred_knn = knn_model.predict(X_test)

# Evaluate
print("KNN Regressor Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred_knn))
print("MSE:", mean_squared_error(y_test, y_pred_knn))
print("R² Score:", r2_score(y_test, y_pred_knn))


KNN Regressor Performance:
MAE: 0.09866666666666667
MSE: 0.19573333333333331
R² Score: 0.8702568937072827


In [85]:
import numpy as np

# Example new user data (replace with actual values or input interface)
new_user = {
    'Age': 25,
    'Sleep Duration': 6.5,
    'Physical Activity Level': 50,
    'Stress Level': 5,
    'Heart Rate': 75,
    'Daily Steps': 5000,
    'Systolic': 120,
    'Diastolic': 80,
    # One-hot encoded fields: adjust based on your actual dummies
    'Gender_Male': 1,
    'Occupation_Doctor': 0,
    'Occupation_Engineer': 0,
    'Occupation_Lawyer': 0,
    'Occupation_Manager': 0,
    'Occupation_Nurse': 0,
    'Occupation_Salesperson': 0,
    'Occupation_Scientist': 0,
    'Occupation_Software Engineer': 1,
    'Occupation_Teacher': 0,
    'BMI Category_Normal': 0,
    'BMI Category_Overweight': 1
}


input_df = pd.DataFrame([new_user], columns=X.columns)
# Predict
predicted_quality = rf_model.predict(input_df)
print("Predicted Quality of Sleep:", predicted_quality[0])


Predicted Quality of Sleep: 6.78


In [86]:
def predict_sleep_quality():
    print("Please enter the following information:")

    # Basic numeric inputs
    age = int(input("Age: "))
    sleep_duration = float(input("Sleep Duration (in hours): "))
    physical_activity = int(input("Physical Activity Level (0–100): "))
    stress_level = int(input("Stress Level (0–10): "))
    heart_rate = int(input("Heart Rate: "))
    daily_steps = int(input("Daily Steps: "))
    systolic = int(input("Systolic BP: "))
    diastolic = int(input("Diastolic BP: "))

    # One-hot categories
    gender = input("Gender (Male/Female): ").strip()
    occupation = input("Occupation (e.g., Software Engineer, Teacher, Doctor): ").strip()
    bmi_category = input("BMI Category (e.g., Normal, Overweight): ").strip()

    # Create initial input dictionary
    input_dict = {
        'Age': age,
        'Sleep Duration': sleep_duration,
        'Physical Activity Level': physical_activity,
        'Stress Level': stress_level,
        'Heart Rate': heart_rate,
        'Daily Steps': daily_steps,
        'Systolic': systolic,
        'Diastolic': diastolic,
    }

    # Add one-hot encoded fields
    for col in X.columns:
        if col.startswith('Gender_'):
            input_dict[col] = 1 if col == f'Gender_{gender}' else 0
        elif col.startswith('Occupation_'):
            input_dict[col] = 1 if col == f'Occupation_{occupation}' else 0
        elif col.startswith('BMI Category_'):
            input_dict[col] = 1 if col == f'BMI Category_{bmi_category}' else 0
        elif col not in input_dict:
            input_dict[col] = 0  # default for all other features

    # Convert to DataFrame
    input_df = pd.DataFrame([input_dict], columns=X.columns)

    # Predict
    prediction = rf_model.predict(input_df)[0]
    print(f"\n✅ Predicted Quality of Sleep: {prediction:.2f}")

# Call the function
predict_sleep_quality()


Please enter the following information:


Age:  23
Sleep Duration (in hours):  7
Physical Activity Level (0–100):  35
Stress Level (0–10):  6
Heart Rate:  80
Daily Steps:  7000
Systolic BP:  120
Diastolic BP:  80
Gender (Male/Female):  Female
Occupation (e.g., Software Engineer, Teacher, Doctor):  Software Engineer
BMI Category (e.g., Normal, Overweight):  Normal



✅ Predicted Quality of Sleep: 5.68
