# CELL 1: Imports & Config

In [68]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from lightgbm import LGBMRegressor
import joblib

np.random.seed(42)

# CELL 2: Load Dataset

In [69]:
df = pd.read_csv("lifesync_dataset.csv")

print("Dataset shape:", df.shape)
df.head()

Dataset shape: (20000, 46)


Unnamed: 0,sleep_hours,sleep_time_variance,exercise_days_per_week,diet_ratio,water_intake_liters,steps_count,distance_walked_km,calories_burned,sedentary_hours,illness_days_monthly,...,expense_exceeding_ratio,expense_tracking_score,savings_ratio,emergency_fund_score,debt_pressure_score,health_score,mind_score,productivity_score,finance_score,life_score
0,7.596057,0.889389,2,77.107913,4.09476,7849,6.037692,362,9.875853,0,...,0.0,69.306956,32.136969,78.358482,66.291848,69.431468,75.741581,71.121145,65.240281,70.908684
1,6.369853,2.530217,1,60.996192,3.757952,10673,8.21,492,10.36444,0,...,0.0,100.0,41.782982,56.587151,82.843585,33.957384,81.67197,87.690518,66.50728,68.215023
2,7.55298,1.068739,3,53.041715,3.107543,13146,10.112308,606,9.887748,2,...,0.0,85.684164,15.993557,34.628106,88.999829,41.587946,76.522953,80.62448,53.695255,64.249043
3,5.34314,0.775355,7,48.718743,3.833993,17310,13.315385,798,5.019,0,...,19.214208,52.768039,10.085622,43.54538,96.828712,51.713348,71.085748,69.167663,41.679849,59.881947
4,5.380732,0.184742,5,30.172359,3.276414,11813,9.086923,545,6.710863,2,...,2.537427,99.606571,0.0,100.0,100.0,42.097091,78.680726,88.739597,59.179757,68.149341


# CELL 3: Feature Maps (Authoritative)

In [70]:
FEATURE_MAP = {
    "health": [
        "sleep_hours","sleep_time_variance","exercise_days_per_week",
        "diet_ratio","water_intake_liters","steps_count",
        "distance_walked_km","calories_burned",
        "sedentary_hours","illness_days_monthly"
    ],
    "mind": [
        "depression_score","anxiety_score","stress_score",
        "childhood_trauma_score","mood_stability_score",
        "meditation_days_per_week","meditation_completion_ratio",
        "breathing_days_per_week","breathing_completion_ratio",
        "distraction_ratio","screen_time_non_work_hours",
        "family_support_ratio","friends_support_ratio"
    ],
    "productivity": [
        "tasks_assigned","tasks_completed","task_completion_ratio",
        "planned_task_hours","actual_task_hours","time_efficiency_ratio",
        "priority_task_completion_ratio","focus_level",
        "productivity_gap","daily_energy_level"
    ],
    "finance": [
        "budget_limit","total_expense","budget_adherence_ratio",
        "expense_exceeding_ratio","expense_tracking_score",
        "savings_ratio","emergency_fund_score","debt_pressure_score"
    ]
}


# CELL 4: Target Columns

In [71]:
TARGETS = [
    "health_score",
    "mind_score",
    "productivity_score",
    "finance_score",
    "life_score"
]

# CELL 5: Trainâ€“Test Split (Single Split for All)

In [72]:
# First split: Train + Temp
train_df, temp_df = train_test_split(
    df,
    test_size=0.3,
    random_state=42
)

# Second split: Validation + Test
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42
)

print("Train:", train_df.shape)
print("Validation:", val_df.shape)
print("Test:", test_df.shape)

Train: (14000, 46)
Validation: (3000, 46)
Test: (3000, 46)


# CELL 6: Scaling (Shared Scaler)

In [73]:
scaler = MinMaxScaler()

FEATURE_COLUMNS = list(set(sum(FEATURE_MAP.values(), [])))

train_df[FEATURE_COLUMNS] = scaler.fit_transform(train_df[FEATURE_COLUMNS])
val_df[FEATURE_COLUMNS]   = scaler.transform(val_df[FEATURE_COLUMNS])
test_df[FEATURE_COLUMNS]  = scaler.transform(test_df[FEATURE_COLUMNS])

# CELL 7: Utility â€“ Evaluation Function

In [74]:
def evaluate_model(y_true, y_pred, label, dataset_name):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"\nðŸ“Š {label} â€“ {dataset_name}")
    print(f"RMSE     : {rmse:.3f}")
    print(f"MAE      : {mae:.3f}")
    print(f"R2 Score : {r2:.3f}")
    print(f"Accuracy : {r2 * 100:.2f}%")

# CELL 8: Train Health Model (Stage 1)

In [75]:
# TRAIN
X_h_train = train_df[FEATURE_MAP["health"]]
y_h_train = train_df["health_score"]

X_h_val = val_df[FEATURE_MAP["health"]]
y_h_val = val_df["health_score"]

X_h_test = test_df[FEATURE_MAP["health"]]
y_h_test = test_df["health_score"]

health_model = LGBMRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    random_state=42
)

health_model.fit(X_h_train, y_h_train)

# Predictions
train_df["health_pred"] = health_model.predict(X_h_train)
val_df["health_pred"]   = health_model.predict(X_h_val)
test_df["health_pred"]  = health_model.predict(X_h_test)

# Evaluation
evaluate_model(y_h_train, train_df["health_pred"], "Health Score", "Train")
evaluate_model(y_h_val,   val_df["health_pred"],   "Health Score", "Validation")
evaluate_model(y_h_test,  test_df["health_pred"],  "Health Score", "Test")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2055
[LightGBM] [Info] Number of data points in the train set: 14000, number of used features: 10
[LightGBM] [Info] Start training from score 51.937732

ðŸ“Š Health Score â€“ Train
RMSE     : 0.677
MAE      : 0.317
R2 Score : 0.998
Accuracy : 99.84%

ðŸ“Š Health Score â€“ Validation
RMSE     : 0.741
MAE      : 0.391
R2 Score : 0.998
Accuracy : 99.81%

ðŸ“Š Health Score â€“ Test
RMSE     : 0.755
MAE      : 0.392
R2 Score : 0.998
Accuracy : 99.80%


# CELL 9: Train Mind Model (Stage 2 â€“ Cascaded)

In [76]:
X_m_train = train_df[FEATURE_MAP["mind"] + ["health_pred"]]
y_m_train = train_df["mind_score"]

X_m_val = val_df[FEATURE_MAP["mind"] + ["health_pred"]]
y_m_val = val_df["mind_score"]

X_m_test = test_df[FEATURE_MAP["mind"] + ["health_pred"]]
y_m_test = test_df["mind_score"]

mind_model = LGBMRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    random_state=42
)

mind_model.fit(X_m_train, y_m_train)

train_df["mind_pred"] = mind_model.predict(X_m_train)
val_df["mind_pred"]   = mind_model.predict(X_m_val)
test_df["mind_pred"]  = mind_model.predict(X_m_test)

evaluate_model(y_m_train, train_df["mind_pred"], "Mind Score", "Train")
evaluate_model(y_m_val,   val_df["mind_pred"],   "Mind Score", "Validation")
evaluate_model(y_m_test,  test_df["mind_pred"],  "Mind Score", "Test")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2582
[LightGBM] [Info] Number of data points in the train set: 14000, number of used features: 14
[LightGBM] [Info] Start training from score 71.341625

ðŸ“Š Mind Score â€“ Train
RMSE     : 2.158
MAE      : 1.699
R2 Score : 0.934
Accuracy : 93.45%

ðŸ“Š Mind Score â€“ Validation
RMSE     : 2.634
MAE      : 2.079
R2 Score : 0.901
Accuracy : 90.06%

ðŸ“Š Mind Score â€“ Test
RMSE     : 2.649
MAE      : 2.097
R2 Score : 0.902
Accuracy : 90.19%


# CELL 10: Train Productivity Model (Stage 3)

In [77]:
X_p_train = train_df[FEATURE_MAP["productivity"] + ["health_pred","mind_pred"]]
y_p_train = train_df["productivity_score"]

X_p_val = val_df[FEATURE_MAP["productivity"] + ["health_pred","mind_pred"]]
y_p_val = val_df["productivity_score"]

X_p_test = test_df[FEATURE_MAP["productivity"] + ["health_pred","mind_pred"]]
y_p_test = test_df["productivity_score"]

productivity_model = LGBMRegressor(
    n_estimators=350,
    learning_rate=0.05,
    max_depth=7,
    random_state=42
)

productivity_model.fit(X_p_train, y_p_train)

train_df["productivity_pred"] = productivity_model.predict(X_p_train)
val_df["productivity_pred"]   = productivity_model.predict(X_p_val)
test_df["productivity_pred"]  = productivity_model.predict(X_p_test)

evaluate_model(y_p_train, train_df["productivity_pred"], "Productivity Score", "Train")
evaluate_model(y_p_val,   val_df["productivity_pred"],   "Productivity Score", "Validation")
evaluate_model(y_p_test,  test_df["productivity_pred"],  "Productivity Score", "Test")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2255
[LightGBM] [Info] Number of data points in the train set: 14000, number of used features: 12
[LightGBM] [Info] Start training from score 74.004947

ðŸ“Š Productivity Score â€“ Train
RMSE     : 0.284
MAE      : 0.221
R2 Score : 0.999
Accuracy : 99.93%

ðŸ“Š Productivity Score â€“ Validation
RMSE     : 0.411
MAE      : 0.303
R2 Score : 0.999
Accuracy : 99.86%

ðŸ“Š Productivity Score â€“ Test
RMSE     : 0.406
MAE      : 0.303
R2 Score : 0.999
Accuracy : 99.86%


# CELL 11: Train Finance Model (Stage 4)

In [78]:
X_f_train = train_df[FEATURE_MAP["finance"] + ["productivity_pred","stress_score"]]
y_f_train = train_df["finance_score"]

X_f_val = val_df[FEATURE_MAP["finance"] + ["productivity_pred","stress_score"]]
y_f_val = val_df["finance_score"]

X_f_test = test_df[FEATURE_MAP["finance"] + ["productivity_pred","stress_score"]]
y_f_test = test_df["finance_score"]

finance_model = LGBMRegressor(
    n_estimators=250,
    learning_rate=0.05,
    max_depth=6,
    random_state=42
)

finance_model.fit(X_f_train, y_f_train)

train_df["finance_pred"] = finance_model.predict(X_f_train)
val_df["finance_pred"]   = finance_model.predict(X_f_val)
test_df["finance_pred"]  = finance_model.predict(X_f_test)

evaluate_model(y_f_train, train_df["finance_pred"], "Finance Score", "Train")
evaluate_model(y_f_val,   val_df["finance_pred"],   "Finance Score", "Validation")
evaluate_model(y_f_test,  test_df["finance_pred"],  "Finance Score", "Test")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000609 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 14000, number of used features: 10
[LightGBM] [Info] Start training from score 55.441264

ðŸ“Š Finance Score â€“ Train
RMSE     : 0.374
MAE      : 0.291
R2 Score : 0.998
Accuracy : 99.84%

ðŸ“Š Finance Score â€“ Validation
RMSE     : 0.501
MAE      : 0.380
R2 Score : 0.997
Accuracy : 99.72%

ðŸ“Š Finance Score â€“ Test
RMSE     : 0.502
MAE      : 0.378
R2 Score : 0.997
Accuracy : 99.72%


# CELL 12: Train Life Meta Model (OPTION B)

In [79]:
X_l_train = train_df[["health_pred","mind_pred","productivity_pred","finance_pred"]]
y_l_train = train_df["life_score"]

X_l_val = val_df[["health_pred","mind_pred","productivity_pred","finance_pred"]]
y_l_val = val_df["life_score"]

X_l_test = test_df[["health_pred","mind_pred","productivity_pred","finance_pred"]]
y_l_test = test_df["life_score"]

life_model = LGBMRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=5,
    random_state=42
)

life_model.fit(X_l_train, y_l_train)

val_df["life_pred"]  = life_model.predict(X_l_val)
test_df["life_pred"] = life_model.predict(X_l_test)

evaluate_model(y_l_train, life_model.predict(X_l_train), "Life Score", "Train")
evaluate_model(y_l_val,   val_df["life_pred"],          "Life Score", "Validation")
evaluate_model(y_l_test,  test_df["life_pred"],         "Life Score", "Test")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1020
[LightGBM] [Info] Number of data points in the train set: 14000, number of used features: 4
[LightGBM] [Info] Start training from score 63.976410

ðŸ“Š Life Score â€“ Train
RMSE     : 0.683
MAE      : 0.534
R2 Score : 0.991
Accuracy : 99.09%

ðŸ“Š Life Score â€“ Validation
RMSE     : 0.903
MAE      : 0.710
R2 Score : 0.984
Accuracy : 98.39%

ðŸ“Š Life Score â€“ Test
RMSE     : 0.898
MAE      : 0.704
R2 Score : 0.984
Accuracy : 98.41%


# CELL 13: Save Models & Scaler

In [80]:
joblib.dump(health_model, "Model/health_model.pkl")
joblib.dump(mind_model, "Model/mind_model.pkl")
joblib.dump(productivity_model, "Model/productivity_model.pkl")
joblib.dump(finance_model, "Model/finance_model.pkl")
joblib.dump(life_model, "Model/life_model.pkl")
joblib.dump(scaler, "Model/feature_scaler.pkl")

print("âœ… All models saved successfully")

âœ… All models saved successfully


# CELL 14: Inference Function (Production-Ready)

In [81]:
def predict_life_scores(input_df):
    input_df[FEATURE_COLUMNS] = scaler.transform(input_df[FEATURE_COLUMNS])

    input_df["health_pred"] = health_model.predict(input_df[FEATURE_MAP["health"]])
    input_df["mind_pred"] = mind_model.predict(
        input_df[FEATURE_MAP["mind"] + ["health_pred"]]
    )
    input_df["productivity_pred"] = productivity_model.predict(
        input_df[FEATURE_MAP["productivity"] + ["health_pred","mind_pred"]]
    )
    input_df["finance_pred"] = finance_model.predict(
        input_df[FEATURE_MAP["finance"] + ["productivity_pred","stress_score"]]
    )

    input_df["life_pred"] = life_model.predict(
        input_df[["health_pred","mind_pred","productivity_pred","finance_pred"]]
    )

    return input_df[[
        "health_pred","mind_pred",
        "productivity_pred","finance_pred","life_pred"
    ]]