In [1]:

# Imports
import os
import warnings
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import librosa




In [2]:
# === EDIT THESE PATHS BEFORE RUNNING ===
DATASET_PATH = r"D:\depressiondetector\diag-woz"   
LABEL_DIR = r"D:\depressiondetector"      
# Name for extracted features CSV (will be written inside DATASET_PATH or another location)
OUTPUT_DIR = r"D:\depressiondetector\extracted_features"
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_CSV_FILE = r"D:\depressiondetector\extracted_features\audio_features.csv"

print("DATASET_PATH:", DATASET_PATH)
print("OUTPUT_CSV_FILE:", OUTPUT_CSV_FILE)


DATASET_PATH: D:\depressiondetector\diag-woz
OUTPUT_CSV_FILE: D:\depressiondetector\extracted_features\audio_features.csv


In [3]:
# Paths to splits (update names if your files differ)
train_labels_path = r"D:\depressiondetector\labels\train_split.csv"
dev_labels_path   = r"D:\depressiondetector\labels\dev_split.csv"
test_labels_path  = r"D:\depressiondetector\labels\test_split.csv"  # or full_test_split.csv if that's your filename

# Load safely with checks
for p in (train_labels_path, dev_labels_path, test_labels_path):
    if not os.path.exists(p):
        raise FileNotFoundError(f"Required label file not found: {p}")

train_labels = pd.read_csv(train_labels_path)
dev_labels   = pd.read_csv(dev_labels_path)
test_labels  = pd.read_csv(test_labels_path)

print("Train labels:", train_labels.shape)
print("Dev labels:  ", dev_labels.shape)
print("Test labels: ", test_labels.shape)

# Inspect columns to find the target column name(s)
print("train columns:", train_labels.columns.tolist())


Train labels: (163, 6)
Dev labels:   (56, 6)
Test labels:  (56, 6)
train columns: ['Participant_ID', 'Gender', 'PHQ_Binary', 'PHQ_Score', 'PCL-C (PTSD)', 'PTSD Severity']


In [4]:
# Different CSVs sometimes use different label column names. Normalize to 'PHQ8_Binary'
def normalize_label_column(df):
    # common variants observed: 'PHQ8_Binary', 'PHQ_Binary', 'PHQ8_binary', etc.
    candidates = ['PHQ8_Binary','PHQ_Binary','PHQ8_binary','PHQ_binary','phq8_binary','PHQ8']
    for c in candidates:
        if c in df.columns:
            df = df.rename(columns={c:'PHQ8_Binary'})
            break
    # If still not present, try to find something with "PHQ" substring.
    if 'PHQ8_Binary' not in df.columns:
        for c in df.columns:
            if 'PHQ' in c.upper():
                df = df.rename(columns={c:'PHQ8_Binary'})
                break
    return df

train_labels = normalize_label_column(train_labels)
dev_labels   = normalize_label_column(dev_labels)
test_labels  = normalize_label_column(test_labels)

# Ensure Participant_ID column exists under consistent name:
for df_name, df in [('train', train_labels), ('dev', dev_labels), ('test', test_labels)]:
    if 'Participant_ID' not in df.columns and 'participant_ID' in df.columns:
        df.rename(columns={'participant_ID':'Participant_ID'}, inplace=True)
    print(df_name, "columns:", df.columns.tolist())


train columns: ['Participant_ID', 'Gender', 'PHQ8_Binary', 'PHQ_Score', 'PCL-C (PTSD)', 'PTSD Severity']
dev columns: ['Participant_ID', 'Gender', 'PHQ8_Binary', 'PHQ_Score', 'PCL-C (PTSD)', 'PTSD Severity']
test columns: ['Participant_ID', 'Gender', 'PHQ8_Binary', 'PHQ_Score', 'PCL-C (PTSD)', 'PTSD Severity']


In [5]:
# Create a unified list of unique participant IDs across splits
train_ids = train_labels['Participant_ID'].astype(str).tolist()
dev_ids   = dev_labels['Participant_ID'].astype(str).tolist()
test_ids  = test_labels['Participant_ID'].astype(str).tolist()

all_participant_ids = sorted(list(set(train_ids + dev_ids + test_ids)))
print(f"Total unique participants across splits: {len(all_participant_ids)}")
# all_participant_ids


Total unique participants across splits: 275


In [7]:
all_participant_ids[0:10]

['300', '301', '302', '303', '304', '305', '306', '307', '308', '309']

In [11]:
train_ids[0:10]

['302', '303', '304', '305', '307', '308', '309', '310', '311', '312']

In [10]:
dev_ids[0:10]

['300', '301', '306', '317', '320', '321', '331', '334', '336', '343']

In [12]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

warnings.filterwarnings("ignore")

In [13]:
def aggregate_stats(arr):
    """Return mean, std, min, max for a numpy array (flattened)."""
    arr_flat = np.asarray(arr).flatten()
    if arr_flat.size == 0:
        return (np.nan, np.nan, np.nan, np.nan)
    return (np.mean(arr_flat), np.std(arr_flat), np.min(arr_flat), np.max(arr_flat))

In [20]:
def extract_features_from_file(file_path, sr_target=16000, duration=180):
    """
    Extract enhanced features with prosody (depression-specific)
    duration: 180s = 3 minutes (good balance)
    """
    y, sr = librosa.load(file_path, sr=sr_target, duration=duration)
    y, _ = librosa.effects.trim(y)
    feats = {}
    
    # ===== EXISTING FEATURES (IMPROVED) =====
    # MFCC (20 instead of 13 - captures more articulation detail)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    feats['mfcc_mean'], feats['mfcc_std'], feats['mfcc_min'], feats['mfcc_max'] = aggregate_stats(mfccs)
    
    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    feats['chroma_mean'], feats['chroma_std'], feats['chroma_min'], feats['chroma_max'] = aggregate_stats(chroma)
    
    # Mel spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    feats['mel_mean'], feats['mel_std'], feats['mel_min'], feats['mel_max'] = aggregate_stats(mel)
    
    # Spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    feats['contrast_mean'], feats['contrast_std'], feats['contrast_min'], feats['contrast_max'] = aggregate_stats(contrast)
    
    # Tonnetz
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
    feats['tonnetz_mean'], feats['tonnetz_std'], feats['tonnetz_min'], feats['tonnetz_max'] = aggregate_stats(tonnetz)
    
    # ===== NEW: PROSODY FEATURES (CRITICAL FOR DEPRESSION) =====
    # 1. Pitch (F0) - depression reduces pitch variability
    try:
        f0 = librosa.yin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
        f0_valid = f0[~np.isnan(f0)]
        if len(f0_valid) > 0:
            feats['pitch_mean'] = np.mean(f0_valid)
            feats['pitch_std'] = np.std(f0_valid)
            feats['pitch_range'] = np.max(f0_valid) - np.min(f0_valid)
        else:
            feats['pitch_mean'] = 0
            feats['pitch_std'] = 0
            feats['pitch_range'] = 0
    except:
        feats['pitch_mean'] = 0
        feats['pitch_std'] = 0
        feats['pitch_range'] = 0
    
    # 2. Energy (RMS) - depression reduces vocal energy
    rms = librosa.feature.rms(y=y)[0]
    feats['energy_mean'] = np.mean(rms)
    feats['energy_std'] = np.std(rms)
    feats['energy_range'] = np.max(rms) - np.min(rms)
    
    # 3. Zero Crossing Rate (speech activity)
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    feats['zcr_mean'] = np.mean(zcr)
    feats['zcr_std'] = np.std(zcr)
    
    # 4. Spectral Features (voice quality)
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    feats['centroid_mean'] = np.mean(centroid)
    feats['centroid_std'] = np.std(centroid)
    
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    feats['rolloff_mean'] = np.mean(rolloff)
    feats['rolloff_std'] = np.std(rolloff)
    
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
    feats['bandwidth_mean'] = np.mean(bandwidth)
    
    # 5. Pause Detection (depression = more pauses/silence)
    intervals = librosa.effects.split(y, top_db=20)
    total_duration = len(y) / sr
    if len(intervals) > 0:
        speech_duration = sum((end - start) / sr for start, end in intervals)
        feats['silence_ratio'] = 1 - (speech_duration / total_duration)
        feats['num_pauses'] = len(intervals) - 1
    else:
        feats['silence_ratio'] = 1.0
        feats['num_pauses'] = 0
    
    # 6. Speech Rate (onset strength)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    feats['onset_mean'] = np.mean(onset_env)
    feats['onset_std'] = np.std(onset_env)
    
    return feats

In [16]:
import wave
import contextlib

durations = []
for pid in all_participant_ids:
    file_path = f"D:/depressiondetector/diag-woz/{pid}_P/{pid}_P/{pid}_AUDIO.wav"
    try:
        with contextlib.closing(wave.open(file_path, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
            duration = frames / float(rate)
            durations.append(duration)
    except:
        pass

print(f"Average duration: {np.mean(durations):.1f}s")
print(f"Min duration: {np.min(durations):.1f}s")
print(f"Max duration: {np.max(durations):.1f}s")

Average duration: 964.0s
Min duration: 414.8s
Max duration: 1966.2s


In [19]:
import wave
import contextlib

durations = []
for pid in train_ids:
    file_path = f"D:/depressiondetector/diag-woz/{pid}_P/{pid}_P/{pid}_AUDIO.wav"
    try:
        with contextlib.closing(wave.open(file_path, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
            duration = frames / float(rate)
            durations.append(duration)
    except:
        pass

print(f"Average duration: {np.mean(durations):.1f}s")
print(f"Min duration: {np.min(durations):.1f}s")
print(f"Max duration: {np.max(durations):.1f}s")

Average duration: 973.1s
Min duration: 414.8s
Max duration: 1966.2s


In [18]:
import wave
import contextlib

durations = []
for pid in test_ids:
    file_path = f"D:/depressiondetector/diag-woz/{pid}_P/{pid}_P/{pid}_AUDIO.wav"
    try:
        with contextlib.closing(wave.open(file_path, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
            duration = frames / float(rate)
            durations.append(duration)
    except:
        pass

print(f"Average duration: {np.mean(durations):.1f}s")
print(f"Min duration: {np.min(durations):.1f}s")
print(f"Max duration: {np.max(durations):.1f}s")

Average duration: 972.2s
Min duration: 525.1s
Max duration: 1686.8s


In [17]:
import wave
import contextlib

durations = []
for pid in dev_ids:
    file_path = f"D:/depressiondetector/diag-woz/{pid}_P/{pid}_P/{pid}_AUDIO.wav"
    try:
        with contextlib.closing(wave.open(file_path, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
            duration = frames / float(rate)
            durations.append(duration)
    except:
        pass

print(f"Average duration: {np.mean(durations):.1f}s")
print(f"Min duration: {np.min(durations):.1f}s")
print(f"Max duration: {np.max(durations):.1f}s")

Average duration: 929.5s
Min duration: 533.3s
Max duration: 1420.0s


In [22]:
all_features = []
missing_files = []
print(f"Starting audio feature extraction for {len(all_participant_ids)} participants...")

for idx, pid in enumerate(tqdm(all_participant_ids, desc="participants"), start=1):
    pid_str = str(pid)
    folder_name = f"{pid_str}_P"
    file_name = f"{pid_str}_AUDIO.wav"
    file_path = f"D:/depressiondetector/diag-woz/{folder_name}/{folder_name}/{file_name}"
    try:
        feats = extract_features_from_file(file_path, sr_target=16000, duration=180)  # load up to 60s
        feats['Participant_ID'] = pid_str
        all_features.append(feats)
    except Exception as e:
        print(f"Error processing {pid_str}: {e}")

print(f"Extraction finished. Extracted features for {len(all_features)} participants.")
print(f"Missing files: {len(missing_files)} (first 10):", missing_files[:10])


Starting audio feature extraction for 275 participants...


participants:   0%|          | 0/275 [00:00<?, ?it/s]

Error processing 632: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/632_P/632_P/632_AUDIO.wav'
Error processing 633: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/633_P/633_P/633_AUDIO.wav'
Error processing 661: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/661_P/661_P/661_AUDIO.wav'
Error processing 662: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/662_P/662_P/662_AUDIO.wav'
Error processing 663: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/663_P/663_P/663_AUDIO.wav'
Error processing 664: [Errno 2] No such file or directory: 'D:/depressiondetector/diag-woz/664_P/664_P/664_AUDIO.wav'
Extraction finished. Extracted features for 269 participants.
Missing files: 0 (first 10): []


In [26]:
all_features[0:1]

[{'mfcc_mean': np.float32(-14.795773),
  'mfcc_std': np.float32(99.142136),
  'mfcc_min': np.float32(-533.9602),
  'mfcc_max': np.float32(177.34402),
  'chroma_mean': np.float32(0.3885395),
  'chroma_std': np.float32(0.30154717),
  'chroma_min': np.float32(0.00084337924),
  'chroma_max': np.float32(1.0),
  'mel_mean': np.float32(0.038381375),
  'mel_std': np.float32(1.8338321),
  'mel_min': np.float32(8.256466e-08),
  'mel_max': np.float32(544.17773),
  'contrast_mean': np.float64(16.10802707595517),
  'contrast_std': np.float64(3.449186360614551),
  'contrast_min': np.float64(5.814903246286432),
  'contrast_max': np.float64(45.859467778186776),
  'tonnetz_mean': np.float64(-0.02642035126228308),
  'tonnetz_std': np.float64(0.13178216765735398),
  'tonnetz_min': np.float64(-0.6497567244805398),
  'tonnetz_max': np.float64(0.675819401629269),
  'pitch_mean': np.float64(246.13021961324466),
  'pitch_std': np.float64(263.6960673538121),
  'pitch_range': np.float64(2139.7633136094673),
  '

In [27]:
df_features = pd.DataFrame(all_features)
# Sort columns to put Participant_ID first
cols = ['Participant_ID'] + [c for c in df_features.columns if c != 'Participant_ID']
df_features = df_features[cols]
df_features.to_csv(OUTPUT_CSV_FILE, index=False)
print("Saved features to:", OUTPUT_CSV_FILE)
df_features.head()


Saved features to: D:\depressiondetector\extracted_features\audio_features.csv


Unnamed: 0,Participant_ID,mfcc_mean,mfcc_std,mfcc_min,mfcc_max,chroma_mean,chroma_std,chroma_min,chroma_max,mel_mean,...,zcr_std,centroid_mean,centroid_std,rolloff_mean,rolloff_std,bandwidth_mean,silence_ratio,num_pauses,onset_mean,onset_std
0,300,-14.795773,99.142136,-533.960205,177.344025,0.388539,0.301547,0.000843,1.0,0.038381,...,0.039893,1943.937335,468.30598,4513.490657,1082.837949,2085.591297,0.954844,31,0.975718,0.771126
1,301,-14.631455,104.950478,-543.891418,225.267242,0.46525,0.308267,0.000861,1.0,0.032293,...,0.055971,1531.415721,574.182551,3560.869734,1378.15515,1821.56473,0.757511,110,1.258738,1.22068
2,302,-14.796026,107.798409,-576.355042,180.074036,0.492308,0.301694,0.001593,1.0,0.007299,...,0.041391,1596.970103,462.771811,3764.932068,1058.536113,1903.019419,0.691378,133,1.149674,0.993042
3,303,-16.326721,106.826447,-562.320007,206.37825,0.398873,0.319536,0.00029,1.0,0.022962,...,0.039343,1407.789278,490.247017,3176.833841,1294.371725,1727.703705,0.695467,138,1.365498,1.325724
4,304,-16.78087,113.715179,-562.917175,203.109924,0.431276,0.318336,0.000286,1.0,0.007488,...,0.024001,1332.510559,302.009992,3070.330552,956.134064,1722.597308,0.765156,119,1.146712,1.024693


In [28]:
# Reload features (safe)
features_df = pd.read_csv(OUTPUT_CSV_FILE).astype({"Participant_ID": str})

# Ensure label dataframes Participant_ID are str
train_labels['Participant_ID'] = train_labels['Participant_ID'].astype(str)
dev_labels['Participant_ID'] = dev_labels['Participant_ID'].astype(str)
test_labels['Participant_ID'] = test_labels['Participant_ID'].astype(str)

# Merge
train_df = pd.merge(features_df, train_labels, on='Participant_ID', how='inner')
dev_df   = pd.merge(features_df, dev_labels, on='Participant_ID', how='inner')
test_df  = pd.merge(features_df, test_labels, on='Participant_ID', how='inner')

print("train_df:", train_df.shape)
print("dev_df:  ", dev_df.shape)
print("test_df: ", test_df.shape)

# Quick check: show a couple rows
train_df.head()



train_df: (161, 43)
dev_df:   (55, 43)
test_df:  (53, 43)


Unnamed: 0,Participant_ID,mfcc_mean,mfcc_std,mfcc_min,mfcc_max,chroma_mean,chroma_std,chroma_min,chroma_max,mel_mean,...,bandwidth_mean,silence_ratio,num_pauses,onset_mean,onset_std,Gender,PHQ8_Binary,PHQ_Score,PCL-C (PTSD),PTSD Severity
0,302,-14.796026,107.79841,-576.35504,180.07404,0.492308,0.301694,0.001593,1.0,0.007299,...,1903.019419,0.691378,133,1.149674,0.993042,male,0,4,0,28
1,303,-16.326721,106.82645,-562.32,206.37825,0.398873,0.319536,0.00029,1.0,0.022962,...,1727.703705,0.695467,138,1.365498,1.325724,female,0,0,0,17
2,304,-16.78087,113.71518,-562.9172,203.10992,0.431276,0.318336,0.000286,1.0,0.007488,...,1722.597308,0.765156,119,1.146712,1.024693,female,0,6,0,20
3,305,-6.683355,74.546936,-436.8938,241.80798,0.372353,0.307797,0.000101,1.0,1.248431,...,1082.563587,0.498133,164,1.133962,1.145486,male,0,7,0,28
4,307,-6.802663,69.84698,-414.14072,213.75266,0.329199,0.317432,0.000107,1.0,1.884708,...,1171.305341,0.495822,195,1.31544,1.406614,female,0,4,0,23


In [49]:
# Reload features (safe)
features_df = pd.read_csv(OUTPUT_CSV_FILE).astype({"Participant_ID": str})

# Ensure label dataframes Participant_ID are str
train_labels['Participant_ID'] = train_labels['Participant_ID'].astype(str)
dev_labels['Participant_ID'] = dev_labels['Participant_ID'].astype(str)
test_labels['Participant_ID'] = test_labels['Participant_ID'].astype(str)

# Merge
train_df = pd.merge(features_df, train_labels, on='Participant_ID', how='inner')
dev_df   = pd.merge(features_df, dev_labels, on='Participant_ID', how='inner')
test_df  = pd.merge(features_df, test_labels, on='Participant_ID', how='inner')

print("train_df:", train_df.shape)
print("dev_df:  ", dev_df.shape)
print("test_df: ", test_df.shape)

# Quick check: show a couple rows
train_df.head()


train_df: (161, 26)
dev_df:   (55, 26)
test_df:  (53, 26)


Unnamed: 0,Participant_ID,mfcc_mean,mfcc_std,mfcc_min,mfcc_max,chroma_mean,chroma_std,chroma_min,chroma_max,mel_mean,...,contrast_max,tonnetz_mean,tonnetz_std,tonnetz_min,tonnetz_max,Gender,PHQ8_Binary,PHQ_Score,PCL-C (PTSD),PTSD Severity
0,302,-22.704115,125.54751,-576.35504,159.41074,0.500923,0.309544,0.001593,1.0,0.012573,...,37.810635,-0.012819,0.069096,-0.369869,0.344158,male,0,4,0,28
1,303,-25.683535,137.7856,-574.6461,186.30038,0.414608,0.315139,0.001407,1.0,0.012661,...,39.142904,-0.014607,0.085247,-0.516046,0.476484,female,0,0,0,17
2,304,-26.678469,139.74298,-572.50555,166.64377,0.427352,0.320371,0.000836,1.0,0.006618,...,44.504158,-0.021001,0.089289,-0.540239,0.500995,female,0,6,0,20
3,305,-9.534797,89.30562,-436.8938,220.432,0.334624,0.319431,9.6e-05,1.0,1.701195,...,43.080801,0.004412,0.118239,-0.6353,0.658467,male,0,7,0,28
4,307,-9.506833,89.39855,-430.17856,215.8266,0.328442,0.318607,0.000379,1.0,1.689439,...,41.626073,-0.010205,0.123659,-0.662332,0.65856,female,0,4,0,23


In [29]:
# Identify feature columns (everything in features_df except Participant_ID)
feature_cols = [c for c in features_df.columns if c != 'Participant_ID']

# Ensure the target column exists in merged dfs
if 'PHQ8_Binary' not in train_df.columns:
    raise KeyError("PHQ8_Binary not found in merged train_df. Check label normalization step.")

# Build X, y
X_train = train_df[feature_cols].copy()
y_train = train_df['PHQ8_Binary'].astype(int).copy()

X_dev = dev_df[feature_cols].copy()
y_dev = dev_df['PHQ8_Binary'].astype(int).copy()

X_test = test_df[feature_cols].copy()
y_test = test_df['PHQ8_Binary'].astype(int).copy()


In [31]:
print("Train data:")
print("X shapes:", X_train.shape)
print("y distribution (train):\n", y_train.value_counts())


Train data:
X shapes: (161, 37)
y distribution (train):
 PHQ8_Binary
0    124
1     37
Name: count, dtype: int64


In [32]:
print("Test data:")
print("X shapes:", X_test.shape)
print("y distribution (train):\n", y_test.value_counts())


Test data:
X shapes: (53, 37)
y distribution (train):
 PHQ8_Binary
0    37
1    16
Name: count, dtype: int64


In [34]:
print("Test data:")
print("X shapes:", X_dev.shape)
print("y distribution (train):\n", y_dev.value_counts())


Test data:
X shapes: (55, 37)
y distribution (train):
 PHQ8_Binary
0    43
1    12
Name: count, dtype: int64


In [35]:
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_dev_scaled   = scaler.transform(X_dev)
X_test_scaled  = scaler.transform(X_test)



In [36]:
# Logistic regression baseline
lr = LogisticRegression(random_state=42, class_weight='balanced', max_iter=1000)
lr.fit(X_train_scaled, y_train)

y_dev_pred = lr.predict(X_dev_scaled)
print("--- Logistic Regression on Dev set ---")
print("Accuracy:", accuracy_score(y_dev, y_dev_pred))
print(classification_report(y_dev, y_dev_pred, target_names=['Not Depressed (0)','Depressed (1)']))

y_test_pred = lr.predict(X_test_scaled)
print("\n--- Logistic Regression on Test set ---")
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=['Not Depressed (0)','Depressed (1)']))



--- Logistic Regression on Dev set ---
Accuracy: 0.5454545454545454
                   precision    recall  f1-score   support

Not Depressed (0)       0.76      0.60      0.68        43
    Depressed (1)       0.19      0.33      0.24        12

         accuracy                           0.55        55
        macro avg       0.48      0.47      0.46        55
     weighted avg       0.64      0.55      0.58        55


--- Logistic Regression on Test set ---
Accuracy: 0.4528301886792453
                   precision    recall  f1-score   support

Not Depressed (0)       0.63      0.51      0.57        37
    Depressed (1)       0.22      0.31      0.26        16

         accuracy                           0.45        53
        macro avg       0.43      0.41      0.41        53
     weighted avg       0.51      0.45      0.47        53



In [39]:
# Random Forest baseline
rf = RandomForestClassifier(random_state=42, class_weight='balanced', n_estimators=100)
rf.fit(X_train_scaled, y_train)

y_dev_rf = rf.predict(X_dev_scaled)
print("\n--- Random Forest on Dev set ---")
print("Accuracy:", accuracy_score(y_dev, y_dev_rf))
print(classification_report(y_dev, y_dev_rf, target_names=['Not Depressed (0)','Depressed (1)']))


y_test_rf = rf.predict(X_test_scaled)
print("\n--- Random Forest on Test set ---")
print("Accuracy:", accuracy_score(y_test, y_test_rf))
print(classification_report(y_test, y_test_rf, target_names=['Not Depressed (0)','Depressed (1)']))



--- Random Forest on Dev set ---
Accuracy: 0.7636363636363637
                   precision    recall  f1-score   support

Not Depressed (0)       0.78      0.98      0.87        43
    Depressed (1)       0.00      0.00      0.00        12

         accuracy                           0.76        55
        macro avg       0.39      0.49      0.43        55
     weighted avg       0.61      0.76      0.68        55


--- Random Forest on Test set ---
Accuracy: 0.660377358490566
                   precision    recall  f1-score   support

Not Depressed (0)       0.69      0.95      0.80        37
    Depressed (1)       0.00      0.00      0.00        16

         accuracy                           0.66        53
        macro avg       0.34      0.47      0.40        53
     weighted avg       0.48      0.66      0.56        53



In [None]:
from xgboost import XGBClassifier

# Add after your Random Forest code
xgb = XGBClassifier(
    random_state=42,
    scale_pos_weight=len(y_train[y_train==0]) / len(y_train[y_train==1]),  # Handle imbalance
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8
)
xgb.fit(X_train_scaled, y_train)

y_test_xgb = xgb.predict(X_test_scaled)
print("\n--- XGBoost on Test set ---")
print("Accuracy:", accuracy_score(y_test, y_test_xgb))
print(classification_report(y_test, y_test_xgb, target_names=['Not Depressed (0)','Depressed (1)']))


y_test_xgb = xgb.predict(X_test_scaled)
print("\n--- XGBoost on Test set ---")
print("Accuracy:", accuracy_score(y_test, y_test_xgb))
print(classification_report(y_test, y_test_xgb, target_names=['Not Depressed (0)','Depressed (1)']))


--- XGBoost on Test set ---
Accuracy: 0.49056603773584906
                   precision    recall  f1-score   support

Not Depressed (0)       0.62      0.68      0.65        37
    Depressed (1)       0.08      0.06      0.07        16

         accuracy                           0.49        53
        macro avg       0.35      0.37      0.36        53
     weighted avg       0.46      0.49      0.47        53



In [43]:
from sklearn.feature_selection import SelectKBest, f_classif

# Select top 25 features
selector = SelectKBest(f_classif, k=25)
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)
X_dev_selected = selector.transform(X_dev_scaled)

# Train on selected features
rf_selected = RandomForestClassifier(random_state=42, class_weight='balanced', n_estimators=200)
rf_selected.fit(X_train_selected, y_train)

y_test_selected = rf_selected.predict(X_test_selected)
print("\n--- Random Forest with Feature Selection  on test---")
print(classification_report(y_test, y_test_selected, target_names=['Not Depressed (0)','Depressed (1)']))

y_dev_selected = rf_selected.predict(X_dev_selected)
print("\n--- Random Forest with Feature Selection on train  ---")
print(classification_report(y_dev, y_dev_selected, target_names=['Not Depressed (0)','Depressed (1)']))


# Show which features were selected
selected_features = [feature_cols[i] for i in selector.get_support(indices=True)]
print("\nSelected features:", selected_features)


--- Random Forest with Feature Selection  on test---
                   precision    recall  f1-score   support

Not Depressed (0)       0.71      0.97      0.82        37
    Depressed (1)       0.50      0.06      0.11        16

         accuracy                           0.70        53
        macro avg       0.60      0.52      0.46        53
     weighted avg       0.64      0.70      0.60        53


--- Random Forest with Feature Selection on train  ---
                   precision    recall  f1-score   support

Not Depressed (0)       0.76      0.91      0.83        43
    Depressed (1)       0.00      0.00      0.00        12

         accuracy                           0.71        55
        macro avg       0.38      0.45      0.41        55
     weighted avg       0.60      0.71      0.65        55


Selected features: ['mfcc_min', 'chroma_mean', 'mel_mean', 'mel_min', 'mel_max', 'contrast_mean', 'contrast_std', 'contrast_min', 'contrast_max', 'tonnetz_mean', 'pitch_mean',

In [47]:
# SMOTE on numerical features
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
print("Before SMOTE:", y_train.value_counts().to_dict())
print("After SMOTE: ", pd.Series(y_train_res).value_counts().to_dict())

# Scale again (fit on resampled training set)
scaler_smote = StandardScaler()
X_train_res_scaled = scaler_smote.fit_transform(X_train_res)
X_test_scaled_smote = scaler_smote.transform(X_test)  # transform test with same scaler

rf_smote = RandomForestClassifier(random_state=42, n_estimators=200)
rf_smote.fit(X_train_res_scaled, y_train_res)

y_test_smote = rf_smote.predict(X_test_scaled_smote)
print("\n--- Random Forest (SMOTE) on Test set ---")
print(classification_report(y_test, y_test_smote, target_names=['Not Depressed (0)','Depressed (1)']))


Before SMOTE: {0: 124, 1: 37}
After SMOTE:  {0: 124, 1: 124}

--- Random Forest (SMOTE) on Test set ---
                   precision    recall  f1-score   support

Not Depressed (0)       0.69      0.78      0.73        37
    Depressed (1)       0.27      0.19      0.22        16

         accuracy                           0.60        53
        macro avg       0.48      0.49      0.48        53
     weighted avg       0.56      0.60      0.58        53



In [45]:
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

# ===== STEP 1: Scale Original Data =====
scaler_initial = StandardScaler()
X_train_scaled = scaler_initial.fit_transform(X_train)
X_test_scaled = scaler_initial.transform(X_test)
X_dev_scaled = scaler_initial.transform(X_dev)

# ===== STEP 2: Feature Selection (on scaled data) =====
selector = SelectKBest(f_classif, k=25)
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)
X_dev_selected = selector.transform(X_dev_scaled)

print(f"Original features: {X_train.shape[1]}")
print(f"Selected features: {X_train_selected.shape[1]}")

# Show which features were selected
selected_features = [feature_cols[i] for i in selector.get_support(indices=True)]
print("\nSelected features:", selected_features)

# ===== STEP 3: SMOTE on Selected Features =====
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_selected, y_train)
print(f"\nBefore SMOTE: {y_train.value_counts().to_dict()}")
print(f"After SMOTE:  {pd.Series(y_train_smote).value_counts().to_dict()}")

# ===== STEP 4: Scale SMOTE Data =====
scaler_smote = StandardScaler()
X_train_smote_scaled = scaler_smote.fit_transform(X_train_smote)
X_test_final = scaler_smote.transform(X_test_selected)
X_dev_final = scaler_smote.transform(X_dev_selected)

# ===== STEP 5: Train Random Forest =====
rf_best = RandomForestClassifier(
    random_state=42,
    n_estimators=300,  # Increased from 200
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    class_weight='balanced'  # Still use this for extra protection
)

rf_best.fit(X_train_smote_scaled, y_train_smote)

# ===== STEP 6: Evaluate =====
print("\n" + "="*60)
print("=== IMPROVED MODEL: Feature Selection + SMOTE + RF ===")
print("="*60)

# Dev set

# Test set
y_test_pred = rf_best.predict(X_test_final)
print("\n--- Test Set Results ---")
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=['Not Depressed (0)','Depressed (1)']))


Original features: 37
Selected features: 25

Selected features: ['mfcc_min', 'chroma_mean', 'mel_mean', 'mel_min', 'mel_max', 'contrast_mean', 'contrast_std', 'contrast_min', 'contrast_max', 'tonnetz_mean', 'pitch_mean', 'pitch_std', 'pitch_range', 'energy_range', 'zcr_mean', 'zcr_std', 'centroid_mean', 'centroid_std', 'rolloff_mean', 'rolloff_std', 'bandwidth_mean', 'silence_ratio', 'num_pauses', 'onset_mean', 'onset_std']

Before SMOTE: {0: 124, 1: 37}
After SMOTE:  {0: 124, 1: 124}

=== IMPROVED MODEL: Feature Selection + SMOTE + RF ===

--- Test Set Results ---
Accuracy: 0.5849056603773585
                   precision    recall  f1-score   support

Not Depressed (0)       0.69      0.73      0.71        37
    Depressed (1)       0.29      0.25      0.27        16

         accuracy                           0.58        53
        macro avg       0.49      0.49      0.49        53
     weighted avg       0.57      0.58      0.58        53



In [48]:
import joblib
model_dir = os.path.join(OUTPUT_DIR, "models")
os.makedirs(model_dir, exist_ok=True)
joblib.dump(lr, os.path.join(model_dir, "logreg_baseline.joblib"))
joblib.dump(rf, os.path.join(model_dir, "rf_baseline.joblib"))
joblib.dump(rf_smote, os.path.join(model_dir, "rf_smote.joblib"))
joblib.dump(scaler_smote, os.path.join(model_dir, "scaler_smote.joblib"))
print("Models and scaler saved to", model_dir)


Models and scaler saved to D:\depressiondetector\extracted_features\models


In [1]:
import librosa
import numpy as np
import joblib

# ----------- Load Model & Scaler -----------
model = joblib.load("D:/depressiondetector/extracted_features/models/rf_smote.joblib")  # Random Forest trained on SMOTE data
scaler = joblib.load("D:/depressiondetector/extracted_features/models/scaler_smote.joblib")

# ----------- Feature Extraction -----------
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000, duration=60)
    y, _ = librosa.effects.trim(y)
    feats = []
    # MFCC (13)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    feats.extend([np.mean(mfccs), np.std(mfccs), np.min(mfccs), np.max(mfccs)])
    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    feats.extend([np.mean(chroma), np.std(chroma), np.min(chroma), np.max(chroma)])
    # Mel spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    feats.extend([np.mean(mel), np.std(mel), np.min(mel), np.max(mel)])
    # Spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    feats.extend([np.mean(contrast), np.std(contrast), np.min(contrast), np.max(contrast)])
    # Tonnetz
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
    feats.extend([np.mean(tonnetz), np.std(tonnetz), np.min(tonnetz), np.max(tonnetz)])
    return np.array(feats).reshape(1, -1)

# ----------- Predict Function -----------
def predict_audio(file_path):
    features = extract_features(file_path)
    features_scaled = scaler.transform(features)

    prediction = model.predict(features_scaled)[0]
    prob = model.predict_proba(features_scaled)[0][prediction]

    labels = {0: "Not Depressed ðŸ™‚", 1: "Depressed ðŸ˜”"}

    print(f"Prediction: {labels[prediction]}  (confidence: {prob:.2f})")




In [2]:
test_file = "D:/depressiondetector/hello4.wav"
predict_audio(test_file)

Prediction: Not Depressed ðŸ™‚  (confidence: 0.56)


