In [None]:
import os, json, time, random
from pathlib import Path
import numpy as np
import pandas as pd
import librosa
import sounddevice as sd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, classification_report, roc_curve, auc, confusion_matrix
import joblib
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

# Paths
BASE = r"C:\Users\Raj Dhanush\OneDrive\Desktop\DEEPLEARNING PROJECT"
TRAIN_AUDIO_DIR = os.path.join(BASE, "depression_detection", "data", "train")
TEST_AUDIO_DIR  = os.path.join(BASE, "depression_detection", "data", "test")
TRAIN_CSV = os.path.join(BASE, "train_split_Depression_AVEC2017.csv")
TEST_CSV  = os.path.join(BASE, "test_split_Depression_AVEC2017.csv")
FEATURE_DIR = os.path.join(BASE, "depression_detection", "features")
MODEL_DIR = os.path.join(BASE, "depression_detection", "models")
os.makedirs(FEATURE_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

print("Train audio dir:", TRAIN_AUDIO_DIR)
print("Test audio dir:", TEST_AUDIO_DIR)


In [None]:
# Settings
SR = 16000
WINDOW_SEC = 5.0
HOP_SEC = 2.5
N_MFCC = 40

BATCH_SIZE = 16
EPOCHS = 25
LR = 1e-4
PATIENCE = 6
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

# Feature extraction: MFCC
def extract_mfcc(y, sr=SR, n_mfcc=N_MFCC):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc = librosa.util.normalize(mfcc)
    return mfcc.astype(np.float32)  # (n_mfcc, frames)


In [3]:
def process_file_to_windows(wav_path, out_dir, window_sec=WINDOW_SEC, hop_sec=HOP_SEC, sr=SR):
    y, _ = librosa.load(wav_path, sr=sr)
    total = len(y)
    window_samples = int(window_sec * sr)
    hop_samples = int(hop_sec * sr)
    starts = list(range(0, max(1, total - window_samples + 1), hop_samples))
    if starts and (starts[-1] + window_samples < total):
        starts.append(total - window_samples)
    if not starts:
        starts = [0]
    saved = []
    for i, s in enumerate(starts):
        chunk = y[s:s+window_samples]
        mfcc = extract_mfcc(chunk, sr=sr)
        base = Path(wav_path).stem
        fname = f"{base}_win{i}.npy"
        out_path = os.path.join(out_dir, fname)
        np.save(out_path, mfcc)
        saved.append(out_path)
    return saved

def preprocess_all_train(train_csv=TRAIN_CSV, audio_dir=TRAIN_AUDIO_DIR, feature_dir=FEATURE_DIR):
    os.makedirs(feature_dir, exist_ok=True)
    df = pd.read_csv(train_csv)
    mapping = []
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Preprocess train"):
        pid = str(int(row['Participant_ID']))
        wav_name = f"{pid}_AUDIO.wav"
        wav_path = os.path.join(audio_dir, wav_name)
        if not os.path.exists(wav_path):
            print("Missing:", wav_path)
            continue
        saved = process_file_to_windows(wav_path, feature_dir)
        for p in saved:
            mapping.append({"file": p, "participant": pid, "label": int(row["PHQ8_Binary"])})
    with open(os.path.join(feature_dir, "mapping.json"), "w") as f:
        json.dump(mapping, f)
    print("Total windows saved:", len(mapping))
    return mapping

mapping_path = os.path.join(FEATURE_DIR, "mapping.json")
if not os.path.exists(mapping_path):
    print("Mapping not found — starting preprocessing (this will take time).")
    preprocess_all_train()
else:
    print("Mapping already exists at:", mapping_path)


Mapping already exists at: C:\Users\Raj Dhanush\OneDrive\Desktop\DEEPLEARNING PROJECT\depression_detection\features\mapping.json


In [4]:
with open(mapping_path, "r") as f:
    mapping = json.load(f)

print("Total windows:", len(mapping))
from collections import Counter
cnt = Counter([m["participant"] for m in mapping])
print("Example windows per participant (first 8):", list(cnt.items())[:8])


Total windows: 38682
Example windows per participant (first 8): [('303', 394), ('304', 317), ('305', 681), ('310', 337), ('312', 315), ('313', 301), ('315', 390), ('316', 347)]


In [5]:
class WindowDataset(Dataset):
    def __init__(self, mapping_json, normalize=False):
        with open(mapping_json, "r") as f:
            self.mapping = json.load(f)
        self.max_frames = 0
        for entry in self.mapping:
            arr = np.load(entry["file"])
            self.max_frames = max(self.max_frames, arr.shape[1])
        self.normalize = normalize
        if self.normalize:
            all_means = [np.load(e["file"]).mean() for e in self.mapping]
            self.global_mean = np.mean(all_means)
            self.global_std = np.std(all_means) + 1e-9
        else:
            self.global_mean, self.global_std = 0.0, 1.0

    def __len__(self):
        return len(self.mapping)

    def __getitem__(self, idx):
        entry = self.mapping[idx]
        arr = np.load(entry["file"])
        n_mfcc, frames = arr.shape
        mf = self.max_frames
        if frames < mf:
            arr = np.pad(arr, ((0,0),(0,mf-frames)), mode='constant')
        elif frames > mf:
            arr = arr[:, :mf]
        arr = (arr - self.global_mean)/self.global_std
        x = torch.tensor(arr, dtype=torch.float).unsqueeze(0)
        y = torch.tensor(entry["label"], dtype=torch.float)
        return x, y

dataset = WindowDataset(mapping_path, normalize=False)
print("Dataset windows:", len(dataset), " max_frames:", dataset.max_frames)


Dataset windows: 38682  max_frames: 313


In [6]:
df_map = pd.DataFrame(mapping)
participants = df_map['participant'].unique()
part_label = df_map.groupby('participant')['label'].first().astype(int)

train_parts, val_parts = train_test_split(participants, test_size=0.2, random_state=SEED, stratify=[part_label.get(p,0) for p in participants])

train_idx = df_map[df_map['participant'].isin(train_parts)].index.tolist()
val_idx   = df_map[df_map['participant'].isin(val_parts)].index.tolist()

train_ds = Subset(dataset, train_idx)
val_ds   = Subset(dataset, val_idx)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=False)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=False)

print("Train windows:", len(train_ds), "Val windows:", len(val_ds))


Train windows: 30789 Val windows: 7893


In [7]:
class ConvEncoder(nn.Module):
    def __init__(self, in_ch=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_ch, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(),
            nn.MaxPool2d((2,2)),
            nn.Conv2d(16,32,3,padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d((2,2)),
            nn.Conv2d(32,64,3,padding=1), nn.BatchNorm2d(64), nn.ReLU()
        )
    def forward(self,x):
        return self.net(x)

class CNNBiLSTM(nn.Module):
    def __init__(self, n_mfcc=N_MFCC, lstm_hidden=128, n_layers=1):
        super().__init__()
        self.encoder = ConvEncoder()
        self.lstm_hidden = lstm_hidden
        self.lstm = None
        self.classifier = nn.Sequential(
            nn.Linear(2*lstm_hidden,64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64,1)
        )

    def forward(self,x):
        b,c,h,w = x.shape
        feat = self.encoder(x)
        b,c2,h2,w2 = feat.shape
        feat = feat.permute(0,3,1,2).contiguous().view(b,w2,-1)
        if self.lstm is None:
            input_size = feat.shape[-1]
            self.lstm = nn.LSTM(input_size, self.lstm_hidden, num_layers=1, batch_first=True, bidirectional=True).to(DEVICE)
        out,_ = self.lstm(feat)
        out = out[:,-1,:]
        logits = self.classifier(out).squeeze(1)
        return logits

model = CNNBiLSTM().to(DEVICE)
print(model)


CNNBiLSTM(
  (encoder): ConvEncoder(
    (net): Sequential(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU()
      (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (10): ReLU()
    )
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=64, out_

In [8]:
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion = nn.BCEWithLogitsLoss()

best_val = float('inf')
pat = 0

train_losses, val_losses = [], []

for epoch in range(1,EPOCHS+1):
    model.train()
    train_loss = 0.0
    for xb,yb in tqdm(train_loader, desc=f"Train E{epoch}"):
        xb,yb = xb.to(DEVICE), yb.to(DEVICE)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*xb.size(0)
    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)

    # Validation
    model.eval()
    val_loss = 0.0
    preds,trues = [],[]
    with torch.no_grad():
        for xb,yb in val_loader:
            xb,yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss = criterion(logits, yb)
            val_loss += loss.item()*xb.size(0)
            probs = torch.sigmoid(logits).cpu().numpy()
            preds.extend(probs.tolist())
            trues.extend(yb.cpu().numpy().tolist())
    val_loss /= len(val_loader.dataset)
    val_losses.append(val_loss)
    val_pred = [1 if p>0.5 else 0 for p in preds]
    val_acc = accuracy_score(trues,val_pred)
    val_f1 = f1_score(trues,val_pred,zero_division=0)
    print(f"Epoch {epoch}: train_loss={train_loss:.4f} val_loss={val_loss:.4f} acc={val_acc:.4f} f1={val_f1:.4f}")

    if val_loss<best_val:
        best_val=val_loss
        pat=0
        torch.save(model.state_dict(), os.path.join(MODEL_DIR,"best_cnn_bilstm.pth"))
        print("Saved best model.")
    else:
        pat+=1
        if pat>=PATIENCE:
            print("Early stopping")
            break

print("Training completed.")


Train E1:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 1: train_loss=0.5855 val_loss=0.5791 acc=0.7191 f1=0.0743
Saved best model.


Train E2:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 2: train_loss=0.5627 val_loss=0.5749 acc=0.7287 f1=0.1501
Saved best model.


Train E3:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 3: train_loss=0.5197 val_loss=0.6072 acc=0.7211 f1=0.0983


Train E4:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 4: train_loss=0.4597 val_loss=0.6089 acc=0.7239 f1=0.3649


Train E5:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 5: train_loss=0.4117 val_loss=0.6476 acc=0.6855 f1=0.3970


Train E6:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 6: train_loss=0.3798 val_loss=0.7181 acc=0.6788 f1=0.3373


Train E7:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 7: train_loss=0.3485 val_loss=0.8468 acc=0.6002 f1=0.3907


Train E8:   0%|          | 0/1925 [00:00<?, ?it/s]

Epoch 8: train_loss=0.3259 val_loss=0.8138 acc=0.6501 f1=0.3737
Early stopping
Training completed.


In [9]:
def compute_global_stats(mapping_json):
    mm = []
    with open(mapping_json,"r") as f:
        m = json.load(f)
    for entry in tqdm(m):
        arr = np.load(entry["file"])
        mm.append(arr.mean())
    g_mean = float(np.mean(mm))
    g_std = float(np.std(mm))+1e-9
    joblib.dump({"mean":g_mean,"std":g_std}, os.path.join(MODEL_DIR,"global_stats.pkl"))
    return g_mean, g_std

if not os.path.exists(os.path.join(MODEL_DIR,"global_stats.pkl")):
    print("Computing global mean/std...")
    compute_global_stats(mapping_path)
else:
    print("global_stats.pkl exists.")


Computing global mean/std...


  0%|          | 0/38682 [00:00<?, ?it/s]

In [11]:
import torch
import os

# Define model directory
MODEL_DIR = r"C:\Users\Raj Dhanush\OneDrive\Desktop\DEEPLEARNING PROJECT\depression_detection\models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Save model weights
MODEL_PATH = os.path.join(MODEL_DIR, "mfcc_cnn_bilstm.pth")
torch.save(model.state_dict(), MODEL_PATH)

print(f"✅ Model saved successfully at: {MODEL_PATH}")


✅ Model saved successfully at: C:\Users\Raj Dhanush\OneDrive\Desktop\DEEPLEARNING PROJECT\depression_detection\models\mfcc_cnn_bilstm.pth


In [12]:
model = CNNBiLSTM().to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH,map_location=DEVICE))
model.eval()
print("Model loaded for inference.")

Model loaded for inference.
