In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import category_encoders as ce
from sklearn.preprocessing import MinMaxScaler


scaler = MinMaxScaler()
label_encoder = LabelEncoder()
encoder = ce.TargetEncoder(cols=['City'])

df = pd.read_csv("Mental health.csv")
df.drop(columns=['id'], inplace=True)
print(df.info())


df['Gender'] = label_encoder.fit_transform(df['Gender'])

df['City'] = encoder.fit_transform(df['City'], df['Depression'])

df.loc[(df['Profession'] != 'Student'), 'Profession'] = 'Non-Student'

df['Profession'] = label_encoder.fit_transform(df['Profession'])

df = df.drop(df[df["Sleep Duration"] == 'Others'].index)

df = pd.get_dummies(df,columns=["Sleep Duration"],dtype=int)

df = df.drop(df[df['Dietary Habits'] == 'Others'].index)

df = pd.get_dummies(df,columns=["Dietary Habits"],dtype=int)

degree_mapping = {
    'Class 12': 'High_School',
    'B.Ed': 'Bachelors', 'B.Com': 'Bachelors', 'B.Arch': 'Bachelors',
    'BCA': 'Bachelors', 'B.Tech': 'Bachelors', 'BHM': 'Bachelors',
    'BSc': 'Bachelors', 'B.Pharm': 'Bachelors', 'BBA': 'Bachelors',
    'BA': 'Bachelors', 'BE': 'Bachelors',
    'MSc': 'Masters', 'MCA': 'Masters', 'M.Tech': 'Masters', 'M.Ed': 'Masters',
    'M.Com': 'Masters', 'M.Pharm': 'Masters', 'MA': 'Masters', 'ME': 'Masters',
    'MHM': 'Masters',
    'MBBS': 'Professional', 'MD': 'Professional', 'MBA': 'Professional',
    'LLB': 'Professional', 'LLM': 'Professional', 'PhD': 'Doctoral',
    'Others': 'Other'
}
df['Degree'] = df['Degree'].map(degree_mapping)

df = pd.get_dummies(df, columns=['Degree'], prefix='edu',dtype=int)

df["Have you ever had suicidal thoughts ?"] = label_encoder.fit_transform(df["Have you ever had suicidal thoughts ?"])

df = df.drop(df[df["Financial Stress"] == '?'].index)

df['Financial Stress'] = pd.to_numeric(df['Financial Stress'])

df["Family History of Mental Illness"] = label_encoder.fit_transform(df["Family History of Mental Illness"])

#Removing outliers based on Age
Q1 = df['Age'].quantile(0.25)
Q3 = df['Age'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df['Age'] >= lower_bound) & (df['Age'] <= upper_bound)]

X_selected = df.drop(['Depression','Profession','edu_Other','Gender','Job Satisfaction','Work Pressure','CGPA','edu_Doctoral',"Sleep Duration_'7-8 hours'","Sleep Duration_'5-6 hours'",'City','edu_Professional','edu_Bachelors','Dietary Habits_Moderate','Family History of Mental Illness'], axis=1)

y = df['Depression']

X_scaled = scaler.fit_transform(X_selected)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27901 entries, 0 to 27900
Data columns (total 17 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Gender                                 27901 non-null  object 
 1   Age                                    27901 non-null  float64
 2   City                                   27901 non-null  object 
 3   Profession                             27901 non-null  object 
 4   Academic Pressure                      27901 non-null  float64
 5   Work Pressure                          27901 non-null  float64
 6   CGPA                                   27901 non-null  float64
 7   Study Satisfaction                     27901 non-null  float64
 8   Job Satisfaction                       27901 non-null  float64
 9   Sleep Duration                         27901 non-null  object 
 10  Dietary Habits                         27901 non-null  object 
 11  De

In [2]:
from sklearn.linear_model import LogisticRegression

LogisticRegression_model = LogisticRegression(solver='liblinear', C=1.0)

LogisticRegression_model.fit(X_scaled, y)
#save the dataset as ready for deployment
df.to_csv("mental_health_cleaned_deployment.csv", index=False)

In [None]:
import tkinter as tk
from tkinter import messagebox
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib

# =========================
# Load your trained LogisticRegression_model and scaler
# =========================
# Save your LogisticRegression_model and scaler first using:
joblib.dump(LogisticRegression_model, "lr_LogisticRegression_model.pkl")
joblib.dump(scaler, "scaler.pkl")

LogisticRegression_model = joblib.load("lr_LogisticRegression_model.pkl")
scaler = joblib.load("scaler.pkl")

# =========================
# GUI
# =========================
def predict():
    try:
        # Collect inputs from entries
        inputs = [float(entry.get()) for entry in entries]
        
        # Scale inputs
        inputs_scaled = scaler.transform([inputs])
        
        # Predict
        pred = LogisticRegression_model.predict(inputs_scaled)[0]
        prob = LogisticRegression_model.predict_proba(inputs_scaled)[0][1]
        
        # Display
        messagebox.showinfo("Prediction", f"Depression Status: {pred}\nProbability: {prob:.2f}")
    except ValueError:
        messagebox.showerror("Error", "Please enter valid numeric values!")

# Create main window
root = tk.Tk()
root.title("Depression Prediction (Logistic Regression)")

# List of features (replace with your actual column names)
features = [

    'Age', 'Academic Pressure', 'Study Satisfaction',
    'Have you ever had suicidal thoughts ?',
    'Work/Study Hours', 'Financial Stress',
    "Sleep Duration_'Less than 5 hours'", 
    "Sleep Duration_'More than 8 hours'",
    'Dietary Habits_Healthy', 'Dietary Habits_Unhealthy', 
    'edu_Masters'

    ]

entries = []

# Create labels and entry fields
for i, feature in enumerate(features):
    tk.Label(root, text=feature).grid(row=i, column=0, padx=10, pady=5, sticky="w")
    entry = tk.Entry(root)
    entry.grid(row=i, column=1, padx=10, pady=5)
    entries.append(entry)

# Predict button
tk.Button(root, text="Predict", command=predict).grid(row=len(features), column=0, columnspan=2, pady=10)

root.mainloop()
