<a href="https://colab.research.google.com/github/MohiteYash/baby/blob/Pipeline_02/Updated_BI_Lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision torchaudio librosa scikit-learn


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [9]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import librosa
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


In [10]:
class BabyCryDataset(Dataset):
    def __init__(self, file_paths, labels, sr=22050, n_mfcc=40, max_len=100):
        self.file_paths = file_paths
        self.labels = labels
        self.sr = sr
        self.n_mfcc = n_mfcc
        self.max_len = max_len

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]

        y, sr = librosa.load(file_path, sr=self.sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=self.n_mfcc)

        # Compute delta and delta-delta features
        delta_mfcc = librosa.feature.delta(mfcc)
        delta2_mfcc = librosa.feature.delta(mfcc, order=2)

        # Stack original, delta, and delta-delta features
        mfcc_features = np.vstack([mfcc, delta_mfcc, delta2_mfcc])  # Shape: (n_mfcc * 3, time_steps)

        # Ensure fixed length
        if mfcc_features.shape[1] < self.max_len:
            mfcc_features = np.pad(mfcc_features, ((0, 0), (0, self.max_len - mfcc_features.shape[1])), mode='constant')
        else:
            mfcc_features = mfcc_features[:, :self.max_len]

        mfcc_features = torch.tensor(mfcc_features, dtype=torch.float32).T  # Shape: (max_len, n_mfcc * 3)

        return mfcc_features, torch.tensor(label, dtype=torch.long)


In [11]:
data_dir = "/content/drive/MyDrive/augmented_baby_cry"
file_paths, labels = [], []

for class_idx, class_name in enumerate(os.listdir(data_dir)):
    class_path = os.path.join(data_dir, class_name)
    if not os.path.isdir(class_path):
        continue
    for fname in os.listdir(class_path):
        if fname.endswith(".wav"):
            file_paths.append(os.path.join(class_path, fname))
            labels.append(class_idx)

# Split dataset
train_paths, test_paths, train_labels, test_labels = train_test_split(file_paths, labels, test_size=0.2, random_state=42)

# Create Dataset and Dataloaders
batch_size = 32  # Increased batch size
train_dataset = BabyCryDataset(train_paths, train_labels)
test_dataset = BabyCryDataset(test_paths, test_labels)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [12]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn_weights = nn.Linear(hidden_size * 2, 1)  # *2 for bidirectional

    def forward(self, lstm_output):
        attn_scores = torch.tanh(self.attn_weights(lstm_output))  # Compute scores
        attn_weights = torch.softmax(attn_scores, dim=1)  # Normalize
        context_vector = torch.sum(attn_weights * lstm_output, dim=1)  # Weighted sum
        return context_vector


In [13]:
class BiLSTMModel(nn.Module):
    def __init__(self, input_size=120, hidden_size=256, num_layers=4, num_classes=5):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.attention = Attention(hidden_size)
        self.fc = nn.Linear(hidden_size * 2, num_classes)  # *2 for bidirectional
        self.dropout = nn.Dropout(0.5)  # Increased dropout

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        x = self.attention(lstm_out)  # Apply attention
        x = self.dropout(x)
        x = self.fc(x)
        return x


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BiLSTMModel().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)  # Reduced learning rate
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)  # LR decay

num_epochs = 30  # Increased epochs


In [15]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for mfcc, labels in train_loader:
        mfcc, labels = mfcc.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(mfcc)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    scheduler.step()  # Adjust learning rate

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")


Epoch 1/30, Loss: 1.3132
Epoch 2/30, Loss: 1.0754
Epoch 3/30, Loss: 0.9087
Epoch 4/30, Loss: 0.7150
Epoch 5/30, Loss: 0.5784
Epoch 6/30, Loss: 0.4625
Epoch 7/30, Loss: 0.3826
Epoch 8/30, Loss: 0.3679
Epoch 9/30, Loss: 0.2619
Epoch 10/30, Loss: 0.2653
Epoch 11/30, Loss: 0.2796
Epoch 12/30, Loss: 0.1910
Epoch 13/30, Loss: 0.1454
Epoch 14/30, Loss: 0.1212
Epoch 15/30, Loss: 0.1517
Epoch 16/30, Loss: 0.1083
Epoch 17/30, Loss: 0.0782
Epoch 18/30, Loss: 0.0642
Epoch 19/30, Loss: 0.0724
Epoch 20/30, Loss: 0.0482
Epoch 21/30, Loss: 0.0463
Epoch 22/30, Loss: 0.0426
Epoch 23/30, Loss: 0.0321
Epoch 24/30, Loss: 0.0463
Epoch 25/30, Loss: 0.1858
Epoch 26/30, Loss: 0.0757
Epoch 27/30, Loss: 0.0386
Epoch 28/30, Loss: 0.0308
Epoch 29/30, Loss: 0.0190
Epoch 30/30, Loss: 0.0193


In [16]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for mfcc, labels in test_loader:
        mfcc, labels = mfcc.to(device), labels.to(device)
        outputs = model(mfcc)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%\n")
print("Classification Report:\n", classification_report(all_labels, all_preds))


Test Accuracy: 83.85%

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.82      0.82        73
           1       0.95      0.90      0.92        77
           2       0.90      0.81      0.85        74
           4       0.71      0.82      0.76        67

    accuracy                           0.84       291
   macro avg       0.84      0.84      0.84       291
weighted avg       0.85      0.84      0.84       291

