In [3]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd
import kagglehub
import os
import warnings
import time
warnings.filterwarnings('ignore')

In [4]:
path = kagglehub.dataset_download("abhishekdave9/digital-habits-vs-mental-health-dataset")
print(path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/abhishekdave9/digital-habits-vs-mental-health-dataset?dataset_version_number=2...


100%|██████████| 546k/546k [00:00<00:00, 86.2MB/s]

Extracting files...
/root/.cache/kagglehub/datasets/abhishekdave9/digital-habits-vs-mental-health-dataset/versions/2





In [5]:
os.listdir(path)

['digital_habits_vs_mental_health.csv']

In [6]:
df = pd.read_csv(os.path.join(path,os.listdir(path)[0]))

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 6 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   screen_time_hours            100000 non-null  float64
 1   social_media_platforms_used  100000 non-null  int64  
 2   hours_on_TikTok              100000 non-null  float64
 3   sleep_hours                  100000 non-null  float64
 4   stress_level                 100000 non-null  int64  
 5   mood_score                   100000 non-null  int64  
dtypes: float64(3), int64(3)
memory usage: 4.6 MB


In [8]:
x_cols = ['screen_time_hours', 'social_media_platforms_used', 'hours_on_TikTok', 'sleep_hours']
y1_cols = ['stress_level']

X = df[x_cols]
y1 = df[y1_cols]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Linear Regression": LinearRegression(),
    "XGBoost": XGBRegressor(),
    "MLP Regressor": MLPRegressor(max_iter=500)
}

results_for_y1 = []

for name, model in models.items():
    print(f"Running: {name}")

    # Use scaled data only for MLP and linear models
    use_scaled = name in ["Linear Regression", "MLP Regressor"]
    X_tr = X_train_scaled if use_scaled else X_train
    X_te = X_test_scaled if use_scaled else X_test

    start_train = time.time()
    model.fit(X_tr, y_train)
    end_train = time.time()

    train_time = end_train - start_train

    start_pred = time.time()
    y_pred = model.predict(X_te)
    end_pred = time.time()

    pred_time = end_pred - start_pred
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    results_for_y1.append({
        "Model": name,
        "R2 Score": round(r2, 3),
        "MAE": round(mae, 3),
        "MSE": round(mse, 3),
        "RMSE": round(rmse, 3),
        "Train Time (s)": round(train_time, 4),
        "Predict Time (s)": round(pred_time, 4)
    })


Running: Linear Regression
Running: XGBoost
Running: MLP Regressor


In [10]:
df_results_for_y1 = pd.DataFrame(results_for_y1).sort_values(by="R2 Score", ascending=False)
df_results_for_y1

Unnamed: 0,Model,R2 Score,MAE,MSE,RMSE,Train Time (s),Predict Time (s)
2,MLP Regressor,0.751,0.815,1.046,1.023,7.9191,0.015
0,Linear Regression,0.748,0.823,1.056,1.028,0.0352,0.0005
1,XGBoost,0.746,0.823,1.069,1.034,0.4289,0.0401


In [11]:
y2_cols = ['mood_score']
y2 = df[y2_cols]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y2, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Linear Regression": LinearRegression(),
    "XGBoost": XGBRegressor(),
    "MLP Regressor": MLPRegressor(max_iter=500)
}


results_for_y2 = []

for name, model in models.items():
    print(f"Running: {name}")

    # Use scaled data only for MLP and linear models
    use_scaled = name in ["Linear Regression", "MLP Regressor"]
    X_tr = X_train_scaled if use_scaled else X_train
    X_te = X_test_scaled if use_scaled else X_test

    start_train = time.time()
    model.fit(X_tr, y_train)
    end_train = time.time()

    train_time = end_train - start_train

    start_pred = time.time()
    y_pred = model.predict(X_te)
    end_pred = time.time()

    pred_time = end_pred - start_pred
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    results_for_y2.append({
        "Model": name,
        "R2 Score": round(r2, 3),
        "MAE": round(mae, 3),
        "MSE": round(mse, 3),
        "RMSE": round(rmse, 3),
        "Train Time (s)": round(train_time, 4),
        "Predict Time (s)": round(pred_time, 4)
    })


Running: Linear Regression
Running: XGBoost
Running: MLP Regressor


In [13]:
df_results_for_y2 = pd.DataFrame(results_for_y2).sort_values(by="R2 Score", ascending=False)
df_results_for_y2

Unnamed: 0,Model,R2 Score,MAE,MSE,RMSE,Train Time (s),Predict Time (s)
2,MLP Regressor,0.662,0.518,0.549,0.741,7.4827,0.0127
1,XGBoost,0.653,0.511,0.563,0.75,0.3452,0.0424
0,Linear Regression,0.567,0.66,0.702,0.838,0.0118,0.0005
