<a href="https://colab.research.google.com/github/0xSah/Depression-Analysis-Dashboard/blob/main/Depression_Analysis_Prototype.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit pandas numpy matplotlib seaborn plotly scikit-learn
!npm install -g localtunnel
!npm fund

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K
changed 22 packages in 1s
[1G[0K⠏[1G[0K
[1G[0K⠏[1G[0K3 packages are looking for funding
[1G[0K⠏[1G[0K  run `npm fund` for details
[1G[0K⠏[1G[0K[1G[0K⠙[1G[0Kcontent

[1G[0K⠙[1G[0K

In [None]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
n_students = 100  # Number of students
n_semesters = 8   # Number of semesters per student
n_records = n_students * n_semesters  # Total records

# Generate base data: Student IDs and Semesters
data = {
    'Student_ID': [f'S{i:03d}' for i in range(1, n_students + 1) for _ in range(n_semesters)],
    'Semester': list(range(1, n_semesters + 1)) * n_students,
}
df = pd.DataFrame(data)

# Simulate CGPA with a declining trend for some students
base_cgpa = np.random.normal(3.0, 0.5, n_students)  # Base CGPA per student
df['Base_CGPA'] = df['Student_ID'].map(dict(zip(df['Student_ID'].unique(), base_cgpa)))
decline_prob = 0.3  # 30% of students have declining CGPA
decline_students = np.random.choice(df['Student_ID'].unique(), size=int(n_students * decline_prob), replace=False)
df['Trend'] = 0.0
df.loc[df['Student_ID'].isin(decline_students), 'Trend'] = -0.05  # Decline by 0.05 per semester
df['CGPA'] = df['Base_CGPA'] + df['Trend'] * (df['Semester'] - 1)
df['CGPA'] = df['CGPA'].clip(0, 4)  # CGPA between 0 and 4

# Simulate stress levels (peaks around semesters 4-5)
df['Stress_Level'] = 5 + 2 * np.sin((df['Semester'] - 4.5) * np.pi / 4)
df['Stress_Level'] += np.random.normal(0, 1, n_records)
df['Stress_Level'] = df['Stress_Level'].clip(1, 10)  # Stress between 1 and 10

# Simulate sleep hours (influenced by stress)
df['Sleep_Hours'] = 7 - 0.5 * (df['Stress_Level'] - 5)  # Higher stress reduces sleep
df['Sleep_Hours'] += np.random.normal(0, 1, n_records)
df['Sleep_Hours'] = df['Sleep_Hours'].clip(4, 12)  # Sleep between 4 and 12 hours

# Simulate social media sentiment (negatively correlated with stress)
df['Social_Media_Sentiment'] = 0.5 - 0.1 * df['Stress_Level']
df['Social_Media_Sentiment'] += np.random.normal(0, 0.2, n_records)
df['Social_Media_Sentiment'] = df['Social_Media_Sentiment'].clip(-1, 1)  # Sentiment between -1 and 1

# Simulate exercise habits (60% exercise daily)
df['Exercise_Daily'] = np.random.choice([0, 1], n_records, p=[0.4, 0.6])
df['Exercise_Duration'] = np.where(
    df['Exercise_Daily'] == 1,
    np.random.normal(45, 15, n_records).clip(0, 120),  # Duration in minutes
    0
)

# Simulate sports participation
df['Sports_Frequency'] = np.random.randint(0, 8, n_records)  # Days per week
df['Sports_Performance'] = np.where(
    df['Sports_Frequency'] > 0,
    np.random.normal(70, 15, n_records).clip(0, 100),  # Performance score
    0
)

# Simulate gaming hours
df['Gaming_Hours'] = np.random.gamma(2, 5, n_records).clip(0, 40)  # Hours per week

# Define weights for depression risk score
weights = {
    'CGPA': 0.3,               # Low CGPA increases risk
    'Stress_Level': 0.2,       # High stress increases risk
    'Sleep_Hours': 0.2,        # Low sleep increases risk
    'Social_Media_Sentiment': 0.1,  # Negative sentiment increases risk
    'Exercise_Daily': 0.2      # No exercise increases risk
}

# Calculate risk score based on conditions
df['Risk_Score'] = (
    (df['CGPA'] < 2.5) * weights['CGPA'] +
    (df['Stress_Level'] > 7) * weights['Stress_Level'] +
    (df['Sleep_Hours'] < 6) * weights['Sleep_Hours'] +
    (df['Social_Media_Sentiment'] < 0) * weights['Social_Media_Sentiment'] +
    (df['Exercise_Daily'] == 0) * weights['Exercise_Daily']
)

# Set depression risk based on threshold
threshold = 0.5
df['Depression_Risk'] = (df['Risk_Score'] >= threshold).astype(int)

# Add noise to continuous variables for realism
for col in ['CGPA', 'Stress_Level', 'Sleep_Hours', 'Social_Media_Sentiment']:
    df[col] += np.random.normal(0, 0.1 * df[col].std(), n_records)
    df[col] = df[col].clip(df[col].min(), df[col].max())  # Keep within original bounds

# Drop temporary columns
df = df.drop(['Base_CGPA', 'Trend'], axis=1)

# Save the dataset to a CSV file
df.to_csv('student_depression_data.csv', index=False)

# Optional: Display the first few rows
print(df.head())

  Student_ID  Semester      CGPA  Stress_Level  Sleep_Hours  \
0       S001         1  3.244944      4.946901     7.993181   
1       S001         2  3.293692      4.041091     7.008875   
2       S001         3  3.229349      1.118029    10.401605   
3       S001         4  3.295564      5.633884     8.181074   
4       S001         5  3.293807      4.902628     7.452281   

   Social_Media_Sentiment  Exercise_Daily  Exercise_Duration  \
0               -0.134640               1          53.333830   
1                0.090217               1          81.439240   
2                0.256323               1          42.059897   
3               -0.004768               0           0.000000   
4               -0.011785               1          23.921262   

   Sports_Frequency  Sports_Performance  Gaming_Hours  Risk_Score  \
0                 3           72.413266      9.483763         0.1   
1                 7           56.161110      6.220924         0.0   
2                 1          

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('student_depression_data.csv')

# Verify the data
print(df.head())

  Student_ID  Semester      CGPA  Stress_Level  Sleep_Hours  \
0       S001         1  3.244944      4.946901     7.993181   
1       S001         2  3.293692      4.041091     7.008875   
2       S001         3  3.229349      1.118029    10.401605   
3       S001         4  3.295564      5.633884     8.181074   
4       S001         5  3.293807      4.902628     7.452281   

   Social_Media_Sentiment  Exercise_Daily  Exercise_Duration  \
0               -0.134640               1          53.333830   
1                0.090217               1          81.439240   
2                0.256323               1          42.059897   
3               -0.004768               0           0.000000   
4               -0.011785               1          23.921262   

   Sports_Frequency  Sports_Performance  Gaming_Hours  Risk_Score  \
0                 3           72.413266      9.483763         0.1   
1                 7           56.161110      6.220924         0.0   
2                 1          

In [None]:
%%writefile depression_dashboard.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.cluster import KMeans

# Load dataset
df = pd.read_csv('student_depression_data.csv')

# Streamlit app title
st.title("Depression Risk Analysis Dashboard")
st.write("Explore multidimensional behavioral and academic trends among university students.")

# Sidebar for interactivity
st.sidebar.header("Filters")
semester_filter = st.sidebar.slider("Select Semester", min_value=1, max_value=8, value=(1, 8))
student_sample = st.sidebar.number_input("Number of Students for Trends", min_value=1, max_value=100, value=5)

# Filter data based on semester
filtered_df = df[(df['Semester'] >= semester_filter[0]) & (df['Semester'] <= semester_filter[1])]

# 1. Heatmap: Correlation Between Features
st.subheader("1. Correlation Heatmap")
numeric_df = filtered_df.select_dtypes(include=[np.number])
correlation_matrix = numeric_df.corr()
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, fmt='.2f', ax=ax)
st.pyplot(fig)

# 2. Scatter Plot: CGPA vs. Stress Level
st.subheader("2. Scatter Plot: CGPA vs Stress Level")
fig = px.scatter(filtered_df, x='CGPA', y='Stress_Level', color='Depression_Risk',
                 hover_data=['Student_ID', 'Semester'], title="CGPA vs Stress Level")
st.plotly_chart(fig)

# 3. Line Plot: CGPA Trends Over Semesters
st.subheader("3. CGPA Trends Over Semesters")
fig, ax = plt.subplots(figsize=(10, 6))
for student in filtered_df['Student_ID'].unique()[:student_sample]:
    student_data = filtered_df[filtered_df['Student_ID'] == student]
    ax.plot(student_data['Semester'], student_data['CGPA'], label=student)
ax.set_xlabel('Semester')
ax.set_ylabel('CGPA')
ax.legend()
st.pyplot(fig)

# 4. Bar Plot: Average CGPA by Depression Risk
st.subheader("4. Average CGPA by Depression Risk")
fig, ax = plt.subplots(figsize=(8, 5))
sns.barplot(x='Depression_Risk', y='CGPA', data=filtered_df, palette='viridis', ax=ax)
ax.set_title('Average CGPA by Depression Risk')
st.pyplot(fig)

# 5. Histogram: Distribution of Sleep Hours
st.subheader("5. Distribution of Sleep Hours")
fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(filtered_df['Sleep_Hours'], bins=20, kde=True, color='blue', ax=ax)
ax.set_xlabel('Sleep Hours')
st.pyplot(fig)

# 6. Box Plot: Gaming Hours by Depression Risk
st.subheader("6. Gaming Hours by Depression Risk")
fig, ax = plt.subplots(figsize=(10, 6))
sns.boxplot(x='Depression_Risk', y='Gaming_Hours', data=filtered_df, palette='Set2', ax=ax)
ax.set_title('Gaming Hours by Depression Risk')
st.pyplot(fig)

# 7. Pie Chart: Proportion of Students by Depression Risk
st.subheader("7. Proportion of Students by Depression Risk")
risk_counts = filtered_df['Depression_Risk'].value_counts()
fig, ax = plt.subplots(figsize=(8, 8))
ax.pie(risk_counts, labels=['No Risk', 'Risk'], autopct='%1.1f%%', colors=['#66b3ff', '#ff6666'])
st.pyplot(fig)

# 8. 3D Scatter Plot: CGPA, Stress Level, and Sleep Hours
st.subheader("8. 3D Scatter Plot: CGPA, Stress Level, and Sleep Hours")
fig = px.scatter_3d(filtered_df, x='CGPA', y='Stress_Level', z='Sleep_Hours', color='Depression_Risk',
                    title='CGPA, Stress Level, and Sleep Hours', color_continuous_scale='Viridis')
st.plotly_chart(fig)

# 9. Cluster Plot: K-Means Clustering
st.subheader("9. K-Means Clustering: CGPA vs Stress Level")
X = filtered_df[['CGPA', 'Stress_Level', 'Sleep_Hours']]
kmeans = KMeans(n_clusters=3, random_state=42)
filtered_df['Cluster'] = kmeans.fit_predict(X)
fig = px.scatter(filtered_df, x='CGPA', y='Stress_Level', color='Cluster',
                 title='K-Means Clustering')
st.plotly_chart(fig)

# Footer
st.write("Dashboard created with Streamlit by xAI's Grok 3 on March 10, 2025.")

Overwriting depression_dashboard.py


In [None]:
!wget -q -O - ipv4.icanhazip.com

In [None]:
!streamlit run depression_dashboard.py & npx localtunnel --port 8501