In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load your data
# Example: csv from Google Drive or local path
data = pd.read_csv('https://drive.google.com/uc?id=1uTH3KaGybY6KAIQPww4nK0_ZMlytmdZW')  # or your Google Drive download link

# Feature engineering - use MATCHING column names from your dataset
# (Double-check spelling, punctuation! For any logic, always use the exact names.)
data['Depression_Risk'] = np.where(
    (data['Sleep Pattern?'] == 'Poor') |
    (data["Do you feel isolated or lonely even when you're surrounded by people"] == 'Yes') |
    (data['Physical Activity'] == 'Sedentary') |
    (data['Alcohol Consumption?'] == 'Regularly') |
    (data['Work Load'] == 'High'), 
    1, 0
)

print("\nClass Distribution:")
print(data['Depression_Risk'].value_counts())

# Only predictive featuresâ€”drop target and IDs/names
X = data.drop(columns=['Depression_Risk', 'Name', 'ID'], errors='ignore')
y = data['Depression_Risk']

# Label encode categorical vars; SAVE encoders for Flask
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale inputs; SAVE scaler for Flask
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression
model = LogisticRegression(random_state=42, class_weight='balanced', max_iter=200)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("\nModel Results:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

# Save model, scaler, label_encoders for Flask inference
joblib.dump(model, 'depression_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')



Class Distribution:
Depression_Risk
1    70
0    33
Name: count, dtype: int64

Model Results:
              precision    recall  f1-score   support

           0       0.80      0.57      0.67         7
           1       0.81      0.93      0.87        14

    accuracy                           0.81        21
   macro avg       0.81      0.75      0.77        21
weighted avg       0.81      0.81      0.80        21

Accuracy: 0.81


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights


['label_encoders.pkl']