In [None]:
# -------------------------------
# STEP: Feature Engineering + Save CSV
# -------------------------------
import pandas as pd
import numpy as np
from google.colab import files

added = []

# Ensure required columns are numeric
num_needed = [
    "Academic Pressure", "Work Pressure", "Financial Stress",
    "Sleep Duration", "Work/Study Hours"
]
for col in num_needed:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

# 1) Stress Index = Academic + Work + Financial stress
needed_stress = ["Academic Pressure", "Work Pressure", "Financial Stress"]
if all(c in df.columns for c in needed_stress):
    df["Stress_Index"] = df["Academic Pressure"] + df["Work Pressure"] + df["Financial Stress"]
    added.append("Stress_Index")
else:
    missing = [c for c in needed_stress if c not in df.columns]
    print(f"[WARN] Skipped Stress_Index (missing columns: {missing})")

# 2) Sleep Quality = Sleep Duration + Work/Study Hours
needed_sleep = ["Sleep Duration", "Work/Study Hours"]
if all(c in df.columns for c in needed_sleep):
    df["Sleep_Quality"] = df["Sleep Duration"] + df["Work/Study Hours"]
    added.append("Sleep_Quality")
else:
    missing = [c for c in needed_sleep if c not in df.columns]
    print(f"[WARN] Skipped Sleep_Quality (missing columns: {missing})")

print(f"Added engineered features: {added}")
print(df[[c for c in added if c in df.columns]].describe())

# -------------------------------
# SAVE & DOWNLOAD
# -------------------------------
out_path = "student_depression_features.csv"
df.to_csv(out_path, index=False)
print(f"\nSaved to: {out_path}  |  Shape: {df.shape}")

# Convert all boolean columns to 0 and 1
df = df.astype({col: int for col in df.columns if df[col].dtype == 'bool'})

# Double-check conversion
print(df.dtypes.head(20))



In [None]:
sns.pairplot(df[["Age","CGPA","Academic Pressure","Work Pressure","Financial Stress"]])
plt.show()