In [None]:
# -----------------------------------------
# Mental Health Dataset â€“ Full EDA Notebook
# -----------------------------------------

# Basic Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Display settings
pd.set_option('display.max_columns', None)

# ----------------------------
# 1. Load Dataset
# ----------------------------
df = pd.read_csv("MentalHealthSurvey.csv")

# Preview dataset
df.head()


In [None]:
# ----------------------------
# 2. Basic Info
# ----------------------------
print("Shape:", df.shape)
print("\nColumn Names:\n", df.columns)
print("\nData Types:\n")
print(df.dtypes)

print("\nMissing Values:\n")
print(df.isnull().sum())


In [None]:
# ----------------------------
# 3. Describe Numeric Columns
# ----------------------------
df.describe()


In [None]:
# ----------------------------
# 4. Value Counts for Categorical Columns
# ----------------------------
categorical_cols = df.select_dtypes(include=['object']).columns

for col in categorical_cols:
    print(f"\n===== {col} =====")
    print(df[col].value_counts())


In [None]:
# ----------------------------
# 5. Clean Column Names
# ----------------------------
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.columns


In [None]:
# ----------------------------
# 6. Univariate Visualization
# ----------------------------
sns.set(style="whitegrid")

# Gender distribution
plt.figure(figsize=(6,4))
sns.countplot(data=df, x="gender")
plt.title("Gender Distribution")
plt.show()

# University distribution
plt.figure(figsize=(10,4))
sns.countplot(data=df, x="university")
plt.title("University Distribution")
plt.xticks(rotation=45)
plt.show()


In [None]:
# ----------------------------
# 7. Mental Health Score Distributions
# ----------------------------
mental_cols = [
    'depression', 'anxiety', 'isolation', 'future_insecurity',
    'study_satisfaction', 'academic_workload', 'academic_pressure',
    'financial_concerns', 'social_relationships'
]

df[mental_cols].hist(figsize=(12,10), bins=5)
plt.suptitle("Mental Health Score Distributions", fontsize=16)
plt.show()


In [None]:
# ----------------------------
# 8. Correlation Heatmap
# ----------------------------
plt.figure(figsize=(10,8))
corr = df[mental_cols].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()


In [None]:
# ----------------------------
# 9. Relationship: Academic Pressure vs Depression/Anxiety
# ----------------------------
plt.figure(figsize=(6,4))
sns.scatterplot(data=df, x='academic_pressure', y='depression')
plt.title("Academic Pressure vs Depression")
plt.show()

plt.figure(figsize=(6,4))
sns.scatterplot(data=df, x='academic_pressure', y='anxiety')
plt.title("Academic Pressure vs Anxiety")
plt.show()


In [None]:
# ----------------------------
# 10. Sleep vs Mental Health
# ----------------------------
# Convert sleep column to numeric if needed
df['average_sleep'] = pd.to_numeric(df['average_sleep'], errors='coerce')

plt.figure(figsize=(6,4))
sns.scatterplot(data=df, x="average_sleep", y="depression")
plt.title("Sleep Duration vs Depression")
plt.show()


In [None]:
# ----------------------------
# 11. Sports / Physical Activity Impact
# ----------------------------
plt.figure(figsize=(7,4))
sns.boxplot(data=df, x="sports_engagement", y="depression")
plt.title("Sports Engagement vs Depression")
plt.xticks(rotation=45)
plt.show()


In [None]:
# ---------------------------------------------
# Neural Network with Hyperparameter Tuning
# Mental Health Dataset
# ---------------------------------------------

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("MentalHealthSurvey.csv")

# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.head()



In [None]:
# Convert depression score into binary class:
# 0-2 = Low risk, 3-5 = High risk
df['depression_label'] = df['depression'].apply(lambda x: 1 if x >= 3 else 0)


In [None]:
categorical_cols = df.select_dtypes(include="object").columns

le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])


In [None]:
X = df.drop(['depression', 'depression_label'], axis=1)
y = df['depression_label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
mlp = MLPClassifier(max_iter=2000)

param_grid = {
    'hidden_layer_sizes': [(32,), (64,), (32,16), (64,32)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.001, 0.01],  # L2 regularization
    'learning_rate_init': [0.001, 0.01]
}

grid = GridSearchCV(
    estimator=mlp,
    param_grid=param_grid,
    cv=3,
    n_jobs=-1,
    scoring='accuracy'
)

grid.fit(X_train_scaled, y_train)

print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)


In [None]:
best_model = grid.best_estimator_

y_pred = best_model.predict(X_test_scaled)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


In [None]:
# Example prediction:
sample = X_test_scaled[0].reshape(1, -1)
print("Predicted Class:", best_model.predict(sample))
