for uni_depression.csv
1) Histogram of Depression Values:

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('uni_depression.csv')
df = df.dropna(subset=['Depression Value'])

# Plot
plt.figure()
plt.hist(df['Depression Value'], bins=14)
plt.xlabel('Depression Value')
plt.ylabel('Frequency')
plt.title('Histogram of Depression Values')
plt.tight_layout()
plt.show()

for uni_depression.csv

2) Violin Plot of Depression by CGPA Range:

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# 1. Load & clean
df = pd.read_csv('uni_depression.csv')
df['Depression Value'] = pd.to_numeric(df['Depression Value'], errors='coerce')
df = df.dropna(subset=['6. Current CGPA','Depression Value'])

# 2. Define all possible labels
all_labels = ['Below 2.50','2.50 - 2.99','3.00 - 3.39','3.40 - 3.79','3.80 - 4.00']

# 3. Build groups, but only keep nonempty ones
groups = []
labels = []
for lab in all_labels:
    vals = df.loc[df['6. Current CGPA']==lab, 'Depression Value']
    if len(vals) > 0:
        groups.append(vals)
        labels.append(lab)

# 4. Plot violin for the nonempty bins
plt.figure(figsize=(8,4))
parts = plt.violinplot(groups,
                       showmeans=True,
                       showextrema=False,
                       widths=0.7)

# make the violins a bit transparent
for pc in parts['bodies']:
    pc.set_alpha(0.6)

plt.xticks(range(1, len(labels)+1), labels, rotation=15)
plt.xlabel('CGPA Range')
plt.ylabel('Depression Value')
plt.title('Depression Score Distribution by CGPA Range\n(only nonempty bins)')
plt.tight_layout()
plt.show()

for uni_depression.csv

3) Jittered Scatterplot with Linear Fit (Estimated CGPA vs. Depression):

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

# 1. Load & clean
df = pd.read_csv('uni_depression.csv')
df['Depression Value'] = pd.to_numeric(df['Depression Value'], errors='coerce')
df = df.dropna(subset=['6. Current CGPA','Depression Value'])

# 2. Map CGPA ranges to midpoints
mapping = {
    'Below 2.50': 2.25,
    '2.50 - 2.99': (2.50+2.99)/2,
    '3.00 - 3.39': (3.00+3.39)/2,
    '3.40 - 3.79': (3.40+3.79)/2,
    '3.80 - 4.00': (3.80+4.00)/2
}
df['cgpa_mid'] = df['6. Current CGPA'].map(mapping)
df = df.dropna(subset=['cgpa_mid'])  # just in case

# 3. Extract arrays
x = df['cgpa_mid'].values
y = df['Depression Value'].values

# 4. Make sure we have enough data
if len(x) < 2:
    raise ValueError("Not enough points to fit a line (need >=2), got %d" % len(x))

# 5. Scatter with jitter
jitter = np.random.normal(0, 0.05, size=len(x))
plt.figure(figsize=(6,4))
plt.scatter(x + jitter, y, alpha=0.6, s=20, label='Data (jittered)')

# 6. Linear regression via linregress
res = linregress(x, y)
xs = np.linspace(x.min(), x.max(), 100)
plt.plot(xs, res.slope * xs + res.intercept, 
         color='red', linewidth=2,
         label=f'y={res.slope:.2f}x+{res.intercept:.1f}\nr={res.rvalue:.2f}, p={res.pvalue:.3f}')

# 7. Labels & legend
plt.xlabel('Estimated CGPA')
plt.ylabel('Depression Value')
plt.title('Jittered Scatter + Linear Fit')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

for uni_depression.csv

4) Boxplot of Depression by Scholarship Status:

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('uni_depression.csv')
df = df.dropna(subset=['7. Did you receive a waiver or scholarship at your university?','Depression Value'])

# Prepare groups
groups = [
    df.loc[df['7. Did you receive a waiver or scholarship at your university?']=='No',  'Depression Value'],
    df.loc[df['7. Did you receive a waiver or scholarship at your university?']=='Yes', 'Depression Value']
]
labels = ['No', 'Yes']

# Plot
plt.figure()
plt.boxplot(groups, labels=labels, showfliers=False)
plt.xlabel('Scholarship Status')
plt.ylabel('Depression Value')
plt.title('Depression by Scholarship Status')
plt.tight_layout()
plt.show()

for uni_depression.csv

5) Kernel Density Estimate (KDE) of Depression by Scholarship Status:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

df = pd.read_csv('uni_depression.csv')
df = df.dropna(subset=['7. Did you receive a waiver or scholarship at your university?','Depression Value'])

plt.figure()
for status, color in zip(['No','Yes'], ['C0','C1']):
    data = df.loc[df['7. Did you receive a waiver or scholarship at your university?']==status, 'Depression Value']
    kde = gaussian_kde(data)
    xs = np.linspace(0, df['Depression Value'].max(), 200)
    plt.plot(xs, kde(xs), label=f"Scholarship = {status}", color=color)

plt.xlabel('Depression Value')
plt.ylabel('Density')
plt.title('KDE of Depression by Scholarship Status')
plt.legend()
plt.tight_layout()
plt.show()

for uni_depression.csv

6) Heatmap of PHQ‑9 Item‑to‑Total Correlations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load and select columns
df = pd.read_csv('uni_depression.csv')
# PHQ‑9 item columns are in positions 7 through 15 (0‑indexed)
phq9_items = df.columns[7:16].tolist()
all_cols = phq9_items + ['Depression Value']

# Convert to numeric and drop missing
data = df[all_cols].apply(pd.to_numeric, errors='coerce').dropna()

# Compute correlation matrix
corr = data.corr()

# Plot heatmap
plt.figure(figsize=(8,6))
plt.imshow(corr, cmap='coolwarm', vmin=-1, vmax=1, aspect='auto')
plt.colorbar(label='Pearson r')
plt.xticks(np.arange(len(corr)), corr.columns, rotation=90)
plt.yticks(np.arange(len(corr)), corr.index)
plt.title('PHQ‑9 Items & Total Score Correlation')
plt.tight_layout()
plt.show()