4.1.a

for second_student_depression_set.csv  
1) Compare Mean CGPA: Depressed vs. Not Depressed

In [None]:
## Compare Mean CGPA: Depressed vs. Not‑Depressed

import pandas as pd
from scipy.stats import ttest_ind

# Load & clean
df = pd.read_csv('second_Student_Depression_Dataset.csv')
df['CGPA']       = pd.to_numeric(df['CGPA'], errors='coerce')
df['Depression'] = pd.to_numeric(df['Depression'], errors='coerce')
df = df.dropna(subset=['CGPA','Depression'])

# Split groups
cgpa_dep   = df.loc[df['Depression']==1, 'CGPA']
cgpa_nondep= df.loc[df['Depression']==0, 'CGPA']

t_stat, p_val = ttest_ind(cgpa_dep, cgpa_nondep, equal_var=False)
print(f"T = {t_stat}, p-value = {p_val}")

# Decision:
# if p_val < 0.05:
#     print("Reject H₀: Mean CGPA differs")
# else:
#     print("Fail to reject H₀")

4.2.a

for second_student_depression_set.csv

2) Spearman Rank‐Correlation (sleep_ord vs. Depression)

In [None]:
import pandas as pd
from scipy.stats import spearmanr

# Load & map
df = pd.read_csv('second_Student_Depression_Dataset.csv')
sleep_map = {
    'Less than 5 hours': 1,
    '5-6 hours':         2,
    '7-8 hours':         3,
    'More than 8 hours': 4
}
df['sleep_ord']  = df['Sleep Duration'].map(sleep_map)
df['Depression'] = pd.to_numeric(df['Depression'], errors='coerce')
df = df.dropna(subset=['sleep_ord','Depression'])

# Spearman test
rho, p = spearmanr(df['sleep_ord'], df['Depression'])
print(f"Spearman p = {rho:.3f}, p-value = {p}")

# Decision:
# if p < 0.05: print("Reject H₀: significant monotonic association")
# else:        print("Fail to reject H₀")

4.2.b

for second_student_depression_set.csv

3) Oneway ANOVA: Depression Score Across Sleep Groups

In [None]:
import pandas as pd
from scipy.stats import f_oneway

# Load & clean
df = pd.read_csv('second_Student_Depression_Dataset.csv')
df['Depression'] = pd.to_numeric(df['Depression'], errors='coerce')
df = df.dropna(subset=['Sleep Duration','Depression'])

# Extract groups
cats = ['Less than 5 hours','5-6 hours','7-8 hours','More than 8 hours']
groups = [ df.loc[df['Sleep Duration']==c, 'Depression'] for c in cats ]

# One-way ANOVA
F, p = f_oneway(*groups)
print(f"F = {F}, p‑value = {p}")

# Decision:
# if p < 0.05: print("Reject H₀: not all means are equal")
# else:        print("Fail to reject H₀")

4.3.a

for second_student_depression_set.csv

4) Association: Family History of Mental Illness vs. Depression (Chi Square)  

In [None]:
import pandas as pd
from scipy.stats import chi2_contingency

# 1. Load and clean
df = pd.read_csv('second_Student_Depression_Dataset.csv')
# Ensure Depression is numeric 0/1
df['Depression'] = pd.to_numeric(df['Depression'], errors='coerce')
# Drop rows where either variable is missing
df = df.dropna(subset=['Family History of Mental Illness', 'Depression'])

# 2. Build contingency table
cont = pd.crosstab(df['Family History of Mental Illness'], df['Depression'])


# 3. Run Chi‑square test of independence
chi2_stat, p_val, dof, expected = chi2_contingency(cont)

# 4. Output results
print(f"Chi2 statistic = {chi2_stat:.3f}")
print(f"Exact p-value     = {p_val:.12e}")


4.4.a

for uni_depression.csv

1) Correlation: Estimated CGPA vs. Depression Score (Pearson)

In [None]:
import pandas as pd
from scipy.stats import pearsonr

# Load & map CGPA ranges to numeric mid‑points
df = pd.read_csv('uni_depression.csv')
mapping = {
    'Below 2.50': 2.25,
    '2.50 - 2.99': (2.50+2.99)/2,
    '3.00 - 3.39': (3.00+3.39)/2,
    '3.40 - 3.79': (3.40+3.79)/2,
    '3.80 - 4.00': (3.80+4.00)/2
}
df['cgpa_mid'] = df['6. Current CGPA'].map(mapping)

# Clean depression values
df['Depression Value'] = pd.to_numeric(df['Depression Value'], errors='coerce')
clean = df.dropna(subset=['cgpa_mid','Depression Value'])

# Pearson’s r
r, p = pearsonr(clean['cgpa_mid'], clean['Depression Value'])
print(f"Pearson r = {r:.3f}, p-value = {p:.3f}")

4.5.a

for uni_depression.csv

2) Independent Two Sample t Test: Scholarship vs. No Scholarship

In [None]:
import pandas as pd
from scipy.stats import ttest_ind

# 1. Load & clean
df = pd.read_csv('uni_depression.csv')
df['Depression Value'] = pd.to_numeric(df['Depression Value'], errors='coerce')
df = df.dropna(subset=['7. Did you receive a waiver or scholarship at your university?', 
                       'Depression Value'])

# 2. Split into two independent samples
sch_yes = df.loc[df['7. Did you receive a waiver or scholarship at your university?']=='Yes',
                 'Depression Value']
sch_no  = df.loc[df['7. Did you receive a waiver or scholarship at your university?']=='No',
                 'Depression Value']

# 3. Student’s t-test (equal variances assumed)
t_stat, p_val = ttest_ind(sch_yes, sch_no, equal_var=True)
print(f"T = {t_stat:.3f}, p-value = {p_val:.3f}")