In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load your dataset from a CSV file
df = pd.read_csv('C:\\Users\\admin\\Documents\\stress_analysis\\sleep\\Sleep_health_and_lifestyle_dataset.csv')

# Step 2: Data Preprocessing
# Convert categorical variables to numeric using Label Encoding
label_encoder_gender = LabelEncoder()
label_encoder_occupation = LabelEncoder()
label_encoder_bmi = LabelEncoder()
label_encoder_sleep_disorder = LabelEncoder()

df['Gender'] = label_encoder_gender.fit_transform(df['Gender'])
df['Occupation'] = label_encoder_occupation.fit_transform(df['Occupation'])
df['BMI Category'] = label_encoder_bmi.fit_transform(df['BMI Category'])
df['Sleep Disorder'] = label_encoder_sleep_disorder.fit_transform(df['Sleep Disorder'])

# Blood Pressure: Split into two columns
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True).astype(int)

# Drop unnecessary columns
df.drop(columns=['Person ID', 'Blood Pressure'], inplace=True)

# Step 3: Split the data into features and target variable
X = df.drop('Stress Level', axis=1)
y = df['Stress Level']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Predict and Evaluate
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')

# Example usage for prediction
example_data = pd.DataFrame({
    "Gender": ["Male"],
    "Age": [30],
    "Occupation": ["Engineer"],
    "Sleep Duration": [7.0],
    "Quality of Sleep": [7],
    "Physical Activity Level": [50],
    "BMI Category": ["Normal"],
    "Heart Rate": [70],
    "Daily Steps": [8000],
    "Sleep Disorder": ["None"],  # Change this to a known value if needed
    "Systolic": [120],
    "Diastolic": [80],
})

# Function to safely encode categorical variables
def safe_transform(label_encoder, value):
    try:
        return label_encoder.transform([value])[0]
    except ValueError:
        print(f"Warning: '{value}' not found in the encoder. Assigning a default value of -1.")
        return -1  # Assign a default value for unknown labels

# Encode categorical variables in the example data using the original encoders
example_data['Gender'] = safe_transform(label_encoder_gender, example_data['Gender'].iloc[0])
example_data['Occupation'] = safe_transform(label_encoder_occupation, example_data['Occupation'].iloc[0])
example_data['BMI Category'] = safe_transform(label_encoder_bmi, example_data['BMI Category'].iloc[0])
example_data['Sleep Disorder'] = safe_transform(label_encoder_sleep_disorder, example_data['Sleep Disorder'].iloc[0])

# Predict stress level
predicted_stress_level = model.predict(example_data)
print(f'Predicted Stress Level: {predicted_stress_level[0]:.2f}')


Mean Squared Error: 0.03
R^2 Score: 0.99
Predicted Stress Level: 5.35


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# -------- 1. Load the Dataset --------
dataset_path = r"C:\\Users\\admin\\Documents\\stress_analysis\\sleep\\Sleep_health_and_lifestyle_dataset.csv"
data = pd.read_csv(dataset_path)

# Display the first few rows to verify correct loading
print(data.head())

# -------- 2. Preprocessing the Data --------
# One-hot encode categorical variables
data_encoded = pd.get_dummies(
    data, 
    columns=['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']
)

# Separate features (X) and target (y)
X = data_encoded.drop(['Stress Level'], axis=1)
y = data_encoded['Stress Level']

# -------- 3. Train-Test Split --------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------- 4. Train the Model --------
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# -------- 5. Evaluate the Model --------
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# Save the trained model
model_path = "stress_level_model.pkl"
joblib.dump(model, model_path)
print(f"Model saved to {model_path}")

# -------- 6. Load Model and Make Predictions --------
loaded_model = joblib.load(model_path)

# Example input aligned with feature columns
sample_data = pd.DataFrame(columns=X_train.columns)

# Example input values (aligned with feature columns used in training)
sample_input = [
    1,  # Person ID (example)
    28,  # Age
    7.0,  # Sleep Duration
    7,  # Quality of Sleep
    50,  # Physical Activity Level
    75,  # Heart Rate
    8000,  # Daily Steps
    1,  # Gender_Male
    0,  # Gender_Female
    1,  # Occupation_Software Engineer
    0,  # Occupation_Doctor
    0,  # Occupation_Teacher
    0,  # BMI Category_Overweight
    0,  # BMI Category_Normal Weight
    1,  # BMI Category_Obese
    0,  # Blood Pressure_120/80
    1,  # Blood Pressure_130/85
    0,  # Blood Pressure_140/90
    1,  # Sleep Disorder_Sleep Apnea
    0   # Sleep Disorder_None
] + [0] * (len(X_train.columns) - 20)  # Fill any remaining columns with 0

# Insert the input values into the DataFrame
sample_data.loc[0] = sample_input

# Ensure all missing columns are filled with 0
sample_data = sample_data.reindex(columns=X_train.columns, fill_value=0)

# Make a prediction
predicted_stress = loaded_model.predict(sample_data)
print(f"Predicted Stress Level: {predicted_stress[0]:.2f}")


   Person ID Gender  Age            Occupation  Sleep Duration  \
0          1   Male   27     Software Engineer             6.1   
1          2   Male   28                Doctor             6.2   
2          3   Male   28                Doctor             6.2   
3          4   Male   28  Sales Representative             5.9   
4          5   Male   28  Sales Representative             5.9   

   Quality of Sleep  Physical Activity Level  Stress Level BMI Category  \
0                 6                       42             6   Overweight   
1                 6                       60             8       Normal   
2                 6                       60             8       Normal   
3                 4                       30             8        Obese   
4                 4                       30             8        Obese   

  Blood Pressure  Heart Rate  Daily Steps Sleep Disorder  
0         126/83          77         4200            NaN  
1         125/80          75      