In [1]:
import pandas as pd
import numpy as np

In [2]:
def descriptive_stats(df, column):
    s = pd.to_numeric(df[column], errors="coerce").dropna()
    stats = {
        "count": s.count(),
        "mean": s.mean(),
        "median": s.median(),
        "mode": s.mode()[0] if not s.mode().empty else np.nan,
        "std_dev": s.std(),
        "min": s.min(),
        "Q1": s.quantile(0.25),
        "Q3": s.quantile(0.75),
        "max": s.max(),
        "range": s.max() - s.min(),
        "IQR": s.quantile(0.75) - s.quantile(0.25),
        "skewness": (s.mean() - s.median()) / s.std() if s.std() else np.nan
    }
    return stats

In [43]:
df = pd.read_csv("Student Mental health.csv", sep=",", engine="python", on_bad_lines="skip")

In [44]:
print(df.columns.tolist())

['Timestamp', 'Choose your gender', 'Age', 'What is your course?', 'Your current year of Study', 'What is your CGPA?', 'Marital status', 'Do you have Depression?', 'Do you have Anxiety?', 'Do you have Panic attack?', 'Did you seek any specialist for a treatment?']


In [45]:
for col in df.columns:
    if "Anxiety" in col:
        dep_var = col
        print("Dependent Variable Found:", dep_var)
        break

Dependent Variable Found: Do you have Anxiety?


In [46]:
print(df[dep_var].unique())

['No' 'Yes']


In [47]:
df[dep_var] = df[dep_var].astype(str).str.strip().str.lower().map({
    "yes": 1,
    "no": 0
})


In [48]:
step = 3
systematic_df = df.iloc[::step, :]

In [49]:
results = descriptive_stats(systematic_df, dep_var)

In [50]:
print("Systematic Sample Size:", len(systematic_df))
print(f"\nDescriptive Statistics for Dependent Variable: {dep_var}\n")
for key, value in results.items():
    print(f"{key:10}: {value}")

Systematic Sample Size: 34

Descriptive Statistics for Dependent Variable: Do you have Anxiety?

count     : 34
mean      : 0.4117647058823529
median    : 0.0
mode      : 0
std_dev   : 0.49955416843564215
min       : 0
Q1        : 0.0
Q3        : 1.0
max       : 1
range     : 1
IQR       : 1.0
skewness  : 0.8242643779188098


In [51]:
print(df.columns.tolist())
print(df.head())

['Timestamp', 'Choose your gender', 'Age', 'What is your course?', 'Your current year of Study', 'What is your CGPA?', 'Marital status', 'Do you have Depression?', 'Do you have Anxiety?', 'Do you have Panic attack?', 'Did you seek any specialist for a treatment?']
        Timestamp Choose your gender   Age What is your course?  \
0  8/7/2020 12:02             Female  18.0          Engineering   
1  8/7/2020 12:04               Male  21.0    Islamic education   
2  8/7/2020 12:05               Male  19.0                  BIT   
3  8/7/2020 12:06             Female  22.0                 Laws   
4  8/7/2020 12:13               Male  23.0         Mathemathics   

  Your current year of Study What is your CGPA? Marital status  \
0                     year 1        3.00 - 3.49             No   
1                     year 2        3.00 - 3.49             No   
2                     Year 1        3.00 - 3.49             No   
3                     year 3        3.00 - 3.49            Yes   
4 