In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

In [5]:
df=pd.read_csv(r"C:\Users\kandu\Downloads\mental_health_workplace_survey.csv")##loading the data file

In [6]:
##Encoding.
text_columns = df.select_dtypes(include=['object']).columns
for col in text_columns:##converting all text-columns into numbers format.
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

In [12]:
##Feature Selection
correlation_matrix = df.corr()['StressLevel']
final_features = correlation_matrix[abs(correlation_matrix) > 0.01].index.drop('StressLevel').tolist() ##selcting the features which are greater than 0.01 of correlation.
print(len(final_features))

19


In [15]:
df['Stress_x_WorkHours'] = df['StressLevel'] * df['WorkHoursPerWeek']
df['Sleep_to_Stress_Ratio'] = df['SleepHours'] / (df['StressLevel'] + 1e-5)
final_features.extend(['Stress_x_WorkHours', 'Sleep_to_Stress_Ratio'])

In [16]:
X = df[final_features]##defining the data.
y = df['StressLevel']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)##splitting the data.
scaler = StandardScaler()##standaizing the features.
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [20]:
##testing 3 types of linear regression models to see which is best.
models_to_test = { 'Standard Linear': LinearRegression(),
                   'Ridge Regularization': Ridge(),'Lasso Regularization': Lasso() }
for name, model in models_to_test.items():##loop for each model,trains it and evaluates its performance.
    model.fit(X_train_scaled, y_train)
    predictions = model.predict(X_test_scaled)
    ##For MSE the lower is the better model and for R**2 the higher is better.
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    print(f"  - {name}:")
    print(f"    Mean Squared Error (MSE): {mse:.2f}")
    print(f"    R-squared (R²) Score: {r2:.2f}")

  - Standard Linear:
    Mean Squared Error (MSE): 0.52
    R-squared (R²) Score: 0.93
  - Ridge Regularization:
    Mean Squared Error (MSE): 0.83
    R-squared (R²) Score: 0.88
  - Lasso Regularization:
    Mean Squared Error (MSE): 2.17
    R-squared (R²) Score: 0.70
