In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load your combined dataset
combined_data = pd.read_csv("combined_dataset.csv")  # Path to your combined dataset

# Encode categorical variables if necessary
le = LabelEncoder()
combined_data['Gender'] = le.fit_transform(combined_data['Gender'])
combined_data['Support_Systems_Access'] = le.fit_transform(combined_data['Support_Systems_Access'])
combined_data['Work_Environment_Impact'] = le.fit_transform(combined_data['Work_Environment_Impact'])
combined_data['Online_Support_Usage'] = le.fit_transform(combined_data['Online_Support_Usage'])

# Standardize numeric features
scaler = StandardScaler()
numeric_features = ['Age', 'Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
                    'Screen_Time_Hours', 'Sleep_Hours', 'Physical_Activity_Hours', 'Humidity',
                    'Temperature', 'Step_count']
combined_data[numeric_features] = scaler.fit_transform(combined_data[numeric_features])

# Split data into training and testing sets
X = combined_data.drop('Stress_Level', axis=1)
y = combined_data['Stress_Level']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import LogisticRegression

# Use lifestyle features for initial stress prediction
lifestyle_features = ['Age', 'Sleep_Hours', 'Physical_Activity_Hours', 'Work_Environment_Impact']
X_train_lifestyle = X_train[lifestyle_features]
X_test_lifestyle = X_test[lifestyle_features]

lifestyle_model = LogisticRegression()
lifestyle_model.fit(X_train_lifestyle, y_train)
lifestyle_stress_score = lifestyle_model.predict(X_train_lifestyle)


In [4]:
# Combine lifestyle score with environmental data for next model
X_train_environment = X_train[['Humidity', 'Temperature']]
X_train_environment['Lifestyle_Stress_Score'] = lifestyle_stress_score

# Train environmental model
from sklearn.tree import DecisionTreeClassifier
environment_model = DecisionTreeClassifier()
environment_model.fit(X_train_environment, y_train)
environment_stress_score = environment_model.predict(X_train_environment)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_environment['Lifestyle_Stress_Score'] = lifestyle_stress_score


In [5]:
# Combine environmental score with physical activity data for next model
X_train_activity = X_train[['Step_count']]
X_train_activity['Environment_Stress_Score'] = environment_stress_score

# Train physical activity model
activity_model = LogisticRegression()
activity_model.fit(X_train_activity, y_train)
activity_stress_score = activity_model.predict(X_train_activity)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_activity['Environment_Stress_Score'] = environment_stress_score


In [6]:
# Combine activity score with social/technology usage data for next model
X_train_tech = X_train[['Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours', 'Screen_Time_Hours']]
X_train_tech['Activity_Stress_Score'] = activity_stress_score

# Train technology/social model
tech_model = LogisticRegression()
tech_model.fit(X_train_tech, y_train)
tech_stress_score = tech_model.predict(X_train_tech)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_tech['Activity_Stress_Score'] = activity_stress_score


In [7]:
import numpy as np

# Combine all previous model outputs to make a final aggregated prediction
X_train_aggregated = np.column_stack((lifestyle_stress_score, environment_stress_score, activity_stress_score, tech_stress_score))

# Train an ensemble model for final stress prediction
from sklearn.ensemble import RandomForestClassifier
final_model = RandomForestClassifier()
final_model.fit(X_train_aggregated, y_train)
final_stress_prediction = final_model.predict(X_train_aggregated)


In [8]:
# Analyze feature importance from final model
importances = final_model.feature_importances_
feature_names = ['Lifestyle', 'Environment', 'Physical Activity', 'Technology Usage']
feature_importance = pd.DataFrame({'Feature': feature_names, 'Importance': importances})

print("Final Stress Prediction:")
print(final_stress_prediction)
print("\nFeature Importance:")
print(feature_importance)


Final Stress Prediction:
[1 1 2 ... 1 2 1]

Feature Importance:
             Feature  Importance
0          Lifestyle        0.00
1        Environment        0.29
2  Physical Activity        0.38
3   Technology Usage        0.33


In [9]:
import joblib


In [10]:
# Save Lifestyle Model
joblib.dump(lifestyle_model, 'lifestyle_model.joblib')

# Save Environmental Model
joblib.dump(environment_model, 'environment_model.joblib')

# Save Physical Activity Model
joblib.dump(activity_model, 'activity_model.joblib')

# Save Technology/Social Model
joblib.dump(tech_model, 'tech_model.joblib')

# Save Final Aggregated Model
joblib.dump(final_model, 'final_model.joblib')


['final_model.joblib']

In [12]:
import joblib

# Load each saved model
lifestyle_model = joblib.load('lifestyle_model.joblib')
environment_model = joblib.load('environment_model.joblib')
activity_model = joblib.load('activity_model.joblib')
tech_model = joblib.load('tech_model.joblib')
final_model = joblib.load('final_model.joblib')


In [13]:
# Test Lifestyle Model
X_test_lifestyle = X_test[['Age', 'Sleep_Hours', 'Physical_Activity_Hours', 'Work_Environment_Impact']]
lifestyle_stress_score_test = lifestyle_model.predict(X_test_lifestyle)

# Test Environmental Model
X_test_environment = X_test[['Humidity', 'Temperature']]
X_test_environment['Lifestyle_Stress_Score'] = lifestyle_stress_score_test
environment_stress_score_test = environment_model.predict(X_test_environment)

# Test Physical Activity Model
X_test_activity = X_test[['Step_count']]
X_test_activity['Environment_Stress_Score'] = environment_stress_score_test
activity_stress_score_test = activity_model.predict(X_test_activity)

# Test Technology/Social Model
X_test_tech = X_test[['Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours', 'Screen_Time_Hours']]
X_test_tech['Activity_Stress_Score'] = activity_stress_score_test
tech_stress_score_test = tech_model.predict(X_test_tech)

# Test Final Aggregated Model
import numpy as np
X_test_aggregated = np.column_stack((lifestyle_stress_score_test, environment_stress_score_test, activity_stress_score_test, tech_stress_score_test))
final_stress_prediction = final_model.predict(X_test_aggregated)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_environment['Lifestyle_Stress_Score'] = lifestyle_stress_score_test
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_activity['Environment_Stress_Score'] = environment_stress_score_test
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_tech['Activity_Stress_Score'] = activity_stress

In [14]:
from sklearn.metrics import accuracy_score, classification_report

# Evaluate each model individually
print("Lifestyle Model Accuracy:", accuracy_score(y_test, lifestyle_stress_score_test))
print("Environmental Model Accuracy:", accuracy_score(y_test, environment_stress_score_test))
print("Activity Model Accuracy:", accuracy_score(y_test, activity_stress_score_test))
print("Tech Model Accuracy:", accuracy_score(y_test, tech_stress_score_test))

# Evaluate the final model
print("Final Model Accuracy:", accuracy_score(y_test, final_stress_prediction))

# Detailed classification report for the final model
print("\nFinal Model Classification Report:\n", classification_report(y_test, final_stress_prediction))


Lifestyle Model Accuracy: 0.49775112443778113
Environmental Model Accuracy: 1.0
Activity Model Accuracy: 1.0
Tech Model Accuracy: 1.0
Final Model Accuracy: 1.0

Final Model Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00       685
           2       1.00      1.00      1.00       649

    accuracy                           1.00      1334
   macro avg       1.00      1.00      1.00      1334
weighted avg       1.00      1.00      1.00      1334



In [15]:
pip install reportlab


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [29]:
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from datetime import datetime

def generate_pdf_report(inputs, predictions, filename='Stress_Report.pdf'):
    c = canvas.Canvas(filename, pagesize=A4)
    width, height = A4

    # Title Section
    c.setFont("Helvetica-Bold", 20)
    c.drawString(100, height - 60, "Comprehensive Stress Analysis Report")

    # Subtitle
    c.setFont("Helvetica", 12)
    c.setFillColor(colors.grey)
    c.drawString(100, height - 85, "A detailed report on stress indicators and model-based predictions")
    
    # Date
    c.setFont("Helvetica", 10)
    c.setFillColor(colors.black)
    c.drawString(100, height - 105, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Line separator
    c.line(100, height - 115, width - 100, height - 115)

    # Summary Section
    c.setFont("Helvetica-Bold", 14)
    c.drawString(100, height - 135, "Summary")
    c.setFont("Helvetica", 10)
    summary_text = ("This report provides an analysis of stress levels based on lifestyle, environmental, "
                    "physical activity, and technology usage factors. Each model calculates an individual "
                    "stress score based on these inputs, leading to a final aggregated stress level.")
    c.drawString(100, height - 155, summary_text)

    # Input Data Section
    c.setFont("Helvetica-Bold", 12)
    c.drawString(100, height - 190, "1. Input Data")
    c.setFont("Helvetica", 10)
    y = height - 210
    for key, value in inputs.items():
        c.drawString(100, y, f"{key}: {value}")
        y -= 15

    # Prediction Results Section
    c.setFont("Helvetica-Bold", 12)
    y -= 20
    c.drawString(100, y, "2. Model-Based Stress Predictions")
    y -= 15
    c.setFont("Helvetica", 10)
    for key, value in predictions.items():
        if key != "Final Stress Level":
            c.drawString(100, y, f"{key}: {value}")
            y -= 15

    # Final Stress Level Section
    y -= 20
    c.setFont("Helvetica-Bold", 12)
    c.drawString(100, y, "3. Final Aggregated Stress Level")
    c.setFont("Helvetica", 10)
    y -= 15
    c.drawString(100, y, f"Overall Stress Level: {predictions['Final Stress Level']}")

    # Conclusion Section
    y -= 40
    c.setFont("Helvetica-Bold", 12)
    c.drawString(100, y, "Conclusion and Recommendations")
    c.setFont("Helvetica", 10)
    y -= 15
    conclusion_text = ("This final stress level assessment combines multiple dimensions of stress. "
                       "A higher score indicates greater stress, which may benefit from targeted "
                       "interventions, such as reducing screen time or improving sleep hygiene. "
                       "Regular monitoring is advised for optimal stress management.")
    for line in conclusion_text.split('. '):
        c.drawString(100, y, line.strip() + ".")
        y -= 15

    c.save()
    print(f"Stress report saved as {filename}")

# Example usage:
random_inputs = generate_random_inputs()
predictions = get_stress_prediction(random_inputs)
generate_pdf_report(random_inputs, predictions, filename='Professional_Stress_Report.pdf')


Stress report saved as Professional_Stress_Report.pdf


