In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [45]:
# Load the dataset
df = pd.read_csv('combined_dataset.csv')


In [46]:
# Encode categorical variables
df['Gender'] = df['Gender'].map({'Female': 0, 'Male': 1, 'Other': 2})
df['Mental_Health_Status'] = df['Mental_Health_Status'].map({'Poor': 0, 'Fair': 1, 'Good': 2, 'Excellent': 3})
df['Support_Systems_Access'] = df['Support_Systems_Access'].map({'No': 0, 'Yes': 1})
df['Work_Environment_Impact'] = df['Work_Environment_Impact'].map({'Negative': 0, 'Neutral': 1, 'Positive': 2})
df['Online_Support_Usage'] = df['Online_Support_Usage'].map({'No': 0, 'Yes': 1})

# Define features and target
X = df[['Age', 'Gender', 'Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
        'Screen_Time_Hours', 'Mental_Health_Status', 'Sleep_Hours', 'Physical_Activity_Hours',
        'Support_Systems_Access', 'Work_Environment_Impact', 'Online_Support_Usage',
        'Humidity', 'Temperature', 'Step_count']]
y = df['Stress_Level'] > 1  # 1 for stressed, 0 for not stressed

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [47]:
# Initialize and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)


In [48]:
# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 1.00


In [40]:
pip install joblib h5py


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [49]:
import h5py

# Save the model as .h5 file
with h5py.File('model.h5', 'w') as f:
    f.create_dataset('coefficients', data=model.coef_)
    f.create_dataset('intercept', data=model.intercept_)


In [50]:
with h5py.File('model.h5', 'r') as f:
    coef = f['coefficients'][:]
    intercept = f['intercept'][:]
    loaded_model = LogisticRegression()
    loaded_model.coef_ = coef
    loaded_model.intercept_ = intercept


In [59]:
import joblib

# Save the model as .pkl file
joblib.dump(model, 'model.pkl')


['model.pkl']

In [60]:
# Load the model from .pkl file
loaded_model = joblib.load('model.pkl')


In [51]:
sample_input = {
    'Age': 28,
    'Gender': 0,  # Female
    'Technology_Usage_Hours': 5.0,
    'Social_Media_Usage_Hours': 4.0,
    'Gaming_Hours': 1.0,
    'Screen_Time_Hours': 10.0,
    'Mental_Health_Status': 2,  # Good
    'Sleep_Hours': 7.0,
    'Physical_Activity_Hours': 3.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 0,  # Negative
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 0.0,
    'Temperature': 0.0,
    'Step_count': 150
}

# Convert the sample input to DataFrame
sample_df = pd.DataFrame([sample_input])


In [61]:
#Load the saved model (if not already loaded)
model = joblib.load('model.pkl')

# Predict stress level
prediction = model.predict(sample_df)
is_stressed = prediction[0]  # 1 for stressed, 0 for not stressed

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


Stress Prediction: Stressed


In [62]:
sample_input = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 2.0,
    'Social_Media_Usage_Hours': 1.0,
    'Gaming_Hours': 0.5,
    'Screen_Time_Hours': 5.5,
    'Mental_Health_Status': 3,  # Excellent
    'Sleep_Hours': 8.5,
    'Physical_Activity_Hours': 7.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 2,  # Positive
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 30.0,
    'Temperature': 70.0,  # Adjusted Temperature
    'Step_count': 10000  # Increased Step Count
}

# Convert the sample input to DataFrame
sample_df = pd.DataFrame([sample_input])


In [63]:
# Load the saved model (if not already loaded)
model = joblib.load('model.pkl')

# Predict stress level
prediction = model.predict(sample_df)
is_stressed = prediction[0]  # 1 for stressed, 0 for not stressed

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


Stress Prediction: Stressed


In [55]:
import numpy as np

# Get feature importance
importance = np.abs(model.coef_[0])
feature_importance = pd.DataFrame({'Feature': X.columns, 'Importance': importance})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print(feature_importance)


                     Feature  Importance
13               Temperature    0.591876
14                Step_count    0.372493
12                  Humidity    0.059721
5          Screen_Time_Hours    0.048630
7                Sleep_Hours    0.046989
2     Technology_Usage_Hours    0.041400
0                        Age    0.031278
8    Physical_Activity_Hours    0.029929
3   Social_Media_Usage_Hours    0.026653
4               Gaming_Hours    0.018133
6       Mental_Health_Status    0.011154
1                     Gender    0.007594
10   Work_Environment_Impact    0.006824
11      Online_Support_Usage    0.004021
9     Support_Systems_Access    0.003448


In [64]:


# Define the adjusted sample input
sample_input = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 1.0,
    'Social_Media_Usage_Hours': 0.5,
    'Gaming_Hours': 0.2,
    'Screen_Time_Hours': 4.0,
    'Mental_Health_Status': 3,  # Excellent
    'Sleep_Hours': 9.0,
    'Physical_Activity_Hours': 8.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 2,  # Positive
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 35.0,
    'Temperature': 65.0,  # Lowering Temperature
    'Step_count': 12000  # Further Increased Step Count
}

# Convert the sample input to DataFrame
sample_df = pd.DataFrame([sample_input])



In [65]:
# Load the saved model (if not already loaded)
model = joblib.load('model.pkl')

# Predict stress level
prediction = model.predict(sample_df)
is_stressed = prediction[0]  # 1 for stressed, 0 for not stressed

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


Stress Prediction: Stressed


In [66]:
# Check for any discrepancies in data processing
print(sample_df)


   Age  Gender  Technology_Usage_Hours  Social_Media_Usage_Hours  \
0   30       1                     1.0                       0.5   

   Gaming_Hours  Screen_Time_Hours  Mental_Health_Status  Sleep_Hours  \
0           0.2                4.0                     3          9.0   

   Physical_Activity_Hours  Support_Systems_Access  Work_Environment_Impact  \
0                      8.0                       1                        2   

   Online_Support_Usage  Humidity  Temperature  Step_count  
0                     1      35.0         65.0       12000  


In [67]:
from sklearn.metrics import classification_report

# Evaluate the model's predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print("Train Set Performance:")
print(classification_report(y_train, y_train_pred))

print("Test Set Performance:")
print(classification_report(y_test, y_test_pred))


Train Set Performance:
              precision    recall  f1-score   support

       False       1.00      1.00      1.00      2314
        True       1.00      1.00      1.00      2355

    accuracy                           1.00      4669
   macro avg       1.00      1.00      1.00      4669
weighted avg       1.00      1.00      1.00      4669

Test Set Performance:
              precision    recall  f1-score   support

       False       1.00      1.00      1.00      1018
        True       1.00      1.00      1.00       983

    accuracy                           1.00      2001
   macro avg       1.00      1.00      1.00      2001
weighted avg       1.00      1.00      1.00      2001



In [68]:
# Check class distribution
print(df['Stress_Level'].value_counts())


Stress_Level
2    3338
1    3332
Name: count, dtype: int64


In [73]:
from sklearn.preprocessing import StandardScaler

# Assuming X_train was scaled during training
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
sample_array_scaled = scaler.transform(sample_array)




In [74]:
# For Logistic Regression (if using .pkl model)
is_stressed = (prediction[0] > 0.5)  # Binary output, threshold at 0.5


In [77]:
import pandas as pd
import joblib
import numpy as np
from keras.models import load_model
from sklearn.preprocessing import StandardScaler

# Define the correct feature names
feature_names = ['Age', 'Gender', 'Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
                 'Screen_Time_Hours', 'Mental_Health_Status', 'Sleep_Hours', 'Physical_Activity_Hours',
                 'Support_Systems_Access', 'Work_Environment_Impact', 'Online_Support_Usage',
                 'Humidity', 'Temperature', 'Step_count']

# Define the adjusted sample input with correct feature names
sample_input = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 1.0,
    'Social_Media_Usage_Hours': 0.5,
    'Gaming_Hours': 0.2,
    'Screen_Time_Hours': 4.0,
    'Mental_Health_Status': 3,  # Excellent
    'Sleep_Hours': 9.0,
    'Physical_Activity_Hours': 8.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 2,  # Positive
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 35.0,
    'Temperature': 65.0,  # Lowering Temperature
    'Step_count': 12000  # Further Increased Step Count
}

# Convert the sample input to DataFrame with correct feature names
sample_df = pd.DataFrame([sample_input], columns=feature_names)
sample_array = sample_df.values


In [82]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# Define the correct feature names
feature_names = ['Age', 'Gender', 'Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
                 'Screen_Time_Hours', 'Mental_Health_Status', 'Sleep_Hours', 'Physical_Activity_Hours',
                 'Support_Systems_Access', 'Work_Environment_Impact', 'Online_Support_Usage',
                 'Humidity', 'Temperature', 'Step_count']

# Define the adjusted sample input with correct feature names
sample_input = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 1.0,
    'Social_Media_Usage_Hours': 0.5,
    'Gaming_Hours': 0.2,
    'Screen_Time_Hours': 4.0,
    'Mental_Health_Status': 3,  # Excellent
    'Sleep_Hours': 9.0,
    'Physical_Activity_Hours': 8.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 2,  # Positive
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 35.0,
    'Temperature': 65.0,  # Lowering Temperature
    'Step_count': 12000  # Further Increased Step Count
}

# Convert the sample input to DataFrame with correct feature names
sample_df = pd.DataFrame([sample_input], columns=feature_names)

# Load the model from .pkl file
model = joblib.load('model.pkl')

# Feature scaling using StandardScaler fitted with the same feature names
scaler = StandardScaler().fit(X_train)  # Ensure X_train has the correct feature names
sample_array_scaled = scaler.transform(sample_df)

# Predict stress level
prediction = model.predict(sample_array_scaled)
is_stressed = (prediction[0] > 0.5)  # Binary output, threshold at 0.5

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


Stress Prediction: Stressed




In [84]:
from keras.models import Sequential
from keras.layers import Dense

# Example model creation
model = Sequential()
model.add(Dense(10, input_dim=15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with some dummy data
import numpy as np

X_train_dummy = np.random.rand(100, 15)
y_train_dummy = np.random.randint(2, size=100)
model.fit(X_train_dummy, y_train_dummy, epochs=5, batch_size=10, verbose=1)

# Save the model
model.save('model_correct.h5')


Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5732 - loss: 0.6724 
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5001 - loss: 0.6934
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5282 - loss: 0.6808
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5315 - loss: 0.6814
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6104 - loss: 0.6711




In [88]:
from keras.models import load_model

# Load the correctly saved model
model = load_model('model_correct.h5')




In [86]:
# Define the adjusted sample input with correct feature names
sample_input = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 1.0,
    'Social_Media_Usage_Hours': 0.5,
    'Gaming_Hours': 0.2,
    'Screen_Time_Hours': 4.0,
    'Mental_Health_Status': 3,  # Excellent
    'Sleep_Hours': 9.0,
    'Physical_Activity_Hours': 8.0,
    'Support_Systems_Access': 1,  # Yes
    'Work_Environment_Impact': 2,  # Positive
    'Online_Support_Usage': 1,  # Yes
    'Humidity': 35.0,
    'Temperature': 65.0,  # Lowering Temperature
    'Step_count': 12000  # Further Increased Step Count
}

# Convert the sample input to DataFrame with correct feature names
sample_df = pd.DataFrame([sample_input], columns=feature_names)
sample_array = sample_df.values

# Feature scaling using StandardScaler fitted with the same feature names
scaler = StandardScaler().fit(X_train)  # Ensure X_train has the correct feature names
sample_array_scaled = scaler.transform(sample_df)

# Predict stress level
prediction = model.predict(sample_array_scaled)
is_stressed = np.argmax(prediction, axis=1)[0]

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
Stress Prediction: Not Stressed


In [89]:
# Define the adjusted sample input with correct feature names
sample_input_stressed = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 10.0,  # High technology usage
    'Social_Media_Usage_Hours': 8.0,  # High social media usage
    'Gaming_Hours': 3.0,
    'Screen_Time_Hours': 12.0,  # High screen time
    'Mental_Health_Status': 1,  # Poor
    'Sleep_Hours': 4.0,  # Low sleep hours
    'Physical_Activity_Hours': 1.0,  # Low physical activity
    'Support_Systems_Access': 0,  # No support systems
    'Work_Environment_Impact': 0,  # Negative work environment impact
    'Online_Support_Usage': 0,  # No online support
    'Humidity': 80.0,  # High humidity
    'Temperature': 95.0,  # High temperature
    'Step_count': 2000  # Low step count
}

# Convert the sample input to DataFrame with correct feature names
sample_df_stressed = pd.DataFrame([sample_input_stressed], columns=feature_names)

sample_array = sample_df.values

# Feature scaling using StandardScaler fitted with the same feature names
scaler = StandardScaler().fit(X_train)  # Ensure X_train has the correct feature names
sample_array_scaled = scaler.transform(sample_df)

# Predict stress level
prediction = model.predict(sample_array_scaled)
is_stressed = np.argmax(prediction, axis=1)[0]

# Output result
stress_status = 'Stressed' if is_stressed else 'Not Stressed'
print(f'Stress Prediction: {stress_status}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Stress Prediction: Not Stressed


In [91]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
import numpy as np
from keras.models import load_model

# Feature names
feature_names = ['Age', 'Gender', 'Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
                 'Screen_Time_Hours', 'Mental_Health_Status', 'Sleep_Hours', 'Physical_Activity_Hours',
                 'Support_Systems_Access', 'Work_Environment_Impact', 'Online_Support_Usage',
                 'Humidity', 'Temperature', 'Step_count']

# Define stressed input
sample_input_stressed = {
    'Age': 30,
    'Gender': 1,  # Male
    'Technology_Usage_Hours': 10.0,  # High technology usage
    'Social_Media_Usage_Hours': 8.0,  # High social media usage
    'Gaming_Hours': 3.0,
    'Screen_Time_Hours': 12.0,  # High screen time
    'Mental_Health_Status': 1,  # Poor
    'Sleep_Hours': 4.0,  # Low sleep hours
    'Physical_Activity_Hours': 1.0,  # Low physical activity
    'Support_Systems_Access': 0,  # No support systems
    'Work_Environment_Impact': 0,  # Negative work environment impact
    'Online_Support_Usage': 0,  # No online support
    'Humidity': 80.0,  # High humidity
    'Temperature': 95.0,  # High temperature
    'Step_count': 2000  # Low step count
}

# Convert input to DataFrame
sample_df_stressed = pd.DataFrame([sample_input_stressed], columns=feature_names)

# Check the DataFrame
print("Sample DataFrame:")
print(sample_df_stressed)

# Load model and scale data
try:
    # For the .pkl model
    model_pkl = joblib.load('model.pkl')

    # Scale features
    scaler = StandardScaler().fit(X_train)
    sample_array_scaled_stressed = scaler.transform(sample_df_stressed)

    # Predict stress level
    prediction_pkl = model_pkl.predict(sample_array_scaled_stressed)
    is_stressed_pkl = prediction_pkl[0] > 0.5  # Binary output
    stress_status_pkl = 'Stressed' if is_stressed_pkl else 'Not Stressed'
    print(f'Stress Prediction (PKL): {stress_status_pkl}')
except Exception as e:
    print(f"Error with .pkl model: {e}")

try:
    # For the .h5 model
    model_h5 = load_model('model_correct.h5')

    # Scale features
    sample_array_scaled_stressed = scaler.transform(sample_df_stressed)

    # Predict stress level
    prediction_h5 = model_h5.predict(sample_array_scaled_stressed)
    is_stressed_h5 = np.argmax(prediction_h5, axis=1)[0]
    stress_status_h5 = 'Stressed' if is_stressed_h5 else 'Not Stressed'
    print(f'Stress Prediction (H5): {stress_status_h5}')
except Exception as e:
    print(f"Error with .h5 model: {e}")




Sample DataFrame:
   Age  Gender  Technology_Usage_Hours  Social_Media_Usage_Hours  \
0   30       1                    10.0                       8.0   

   Gaming_Hours  Screen_Time_Hours  Mental_Health_Status  Sleep_Hours  \
0           3.0               12.0                     1          4.0   

   Physical_Activity_Hours  Support_Systems_Access  Work_Environment_Impact  \
0                      1.0                       0                        0   

   Online_Support_Usage  Humidity  Temperature  Step_count  
0                     0      80.0         95.0        2000  
Stress Prediction (PKL): Stressed
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
Stress Prediction (H5): Not Stressed


In [92]:
# Probability prediction for .pkl model
prediction_proba_pkl = model_pkl.predict_proba(sample_array_scaled_stressed)
print("Prediction probabilities (PKL):", prediction_proba_pkl)

# Check the decision boundary
is_stressed_pkl = prediction_proba_pkl[0][1] > 0.5  # Adjust the threshold if needed
stress_status_pkl = 'Stressed' if is_stressed_pkl else 'Not Stressed'
print(f'Final Stress Prediction (PKL): {stress_status_pkl}')


Prediction probabilities (PKL): [[3.11793326e-08 9.99999969e-01]]
Final Stress Prediction (PKL): Stressed




In [93]:
# Check the probabilities from the .h5 model
print("Prediction probabilities (H5):", prediction_h5)

# Ensure proper evaluation of the output
is_stressed_h5 = np.argmax(prediction_h5, axis=1)[0]
stress_status_h5 = 'Stressed' if is_stressed_h5 else 'Not Stressed'
print(f'Final Stress Prediction (H5): {stress_status_h5}')


Prediction probabilities (H5): [[0.99858]]
Final Stress Prediction (H5): Not Stressed
