<a href="https://colab.research.google.com/github/MohiteYash/baby/blob/resnet/Baby_Separate_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import librosa
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from torch.cuda.amp import autocast, GradScaler  # Mixed Precision Training

# Enable cuDNN Auto-tuner for Faster Convolutions
torch.backends.cudnn.benchmark = True


In [31]:
class BabyCryResNetDataset(Dataset):
    def __init__(self, file_paths, labels, sr=22050):
        self.file_paths = file_paths
        self.labels = labels
        self.sr = sr

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]

        # Load and Compute Mel Spectrogram
        y, sr = librosa.load(file_path, sr=self.sr)
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

        # Convert to Tensor
        mel_spec = torch.tensor(mel_spec).unsqueeze(0)  # (1, 128, Time)
        mel_spec_resized = torch.nn.functional.interpolate(mel_spec.unsqueeze(0), size=(224, 224), mode="bilinear").squeeze(0)
        resnet_input = torch.cat([mel_spec_resized] * 3, dim=0)  # (3, 224, 224)

        return resnet_input, torch.tensor(label, dtype=torch.long)


In [32]:
# Dataset Path
data_dir = "/content/drive/MyDrive/augmented_baby_cry"
file_paths, labels = [], []

for class_idx, class_name in enumerate(os.listdir(data_dir)):
    class_path = os.path.join(data_dir, class_name)
    for fname in os.listdir(class_path):
        if fname.endswith(".wav"):
            file_paths.append(os.path.join(class_path, fname))
            labels.append(class_idx)

# Split dataset into training & testing
train_paths, test_paths, train_labels, test_labels = train_test_split(file_paths, labels, test_size=0.2, random_state=42)

# Optimized DataLoaders for Faster GPU Loading
train_loader = DataLoader(BabyCryResNetDataset(train_paths, train_labels), batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(BabyCryResNetDataset(test_paths, test_labels), batch_size=32, shuffle=False, num_workers=4, pin_memory=True)




In [33]:
class ResNet50Model(nn.Module):
    def __init__(self, num_classes=5):
        super(ResNet50Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        self.resnet.fc = nn.Linear(2048, num_classes)  # Replace final FC layer

    def forward(self, x):
        return self.resnet(x)

# Initialize Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet50Model(num_classes=5).to(device)


In [34]:
def train_model(model, train_loader, epochs=20, lr=0.001):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    scaler = GradScaler()  # Mixed Precision Training

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with autocast():  # Enables Mixed Precision Training
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    acc = accuracy_score(y_true, y_pred) * 100
    class_report = classification_report(y_true, y_pred, digits=2)

    print(f"\nTest Accuracy: {acc:.2f}%")
    print("\nClassification Report:")
    print(class_report)

    return acc, class_report


In [35]:
# Train ResNet-50
train_model(model, train_loader, epochs=20, lr=0.001)

# Evaluate Model
test_acc, class_report = evaluate_model(model, test_loader)

# Save Trained Model
model_path = "resnet50_baby_cry.pth"
torch.save(model.state_dict(), model_path)
print(f"✅ Model Saved Successfully at {model_path}!")


  scaler = GradScaler()  # Mixed Precision Training
  with autocast():  # Enables Mixed Precision Training


Epoch 1/20, Loss: 1.1177
Epoch 2/20, Loss: 0.4895
Epoch 3/20, Loss: 0.5107
Epoch 4/20, Loss: 0.3021
Epoch 5/20, Loss: 0.1871
Epoch 6/20, Loss: 0.1120
Epoch 7/20, Loss: 0.1065
Epoch 8/20, Loss: 0.1242
Epoch 9/20, Loss: 0.1074
Epoch 10/20, Loss: 0.0961
Epoch 11/20, Loss: 0.1058
Epoch 12/20, Loss: 0.0486
Epoch 13/20, Loss: 0.0332
Epoch 14/20, Loss: 0.1119
Epoch 15/20, Loss: 0.1009
Epoch 16/20, Loss: 0.0669
Epoch 17/20, Loss: 0.0510
Epoch 18/20, Loss: 0.0097
Epoch 19/20, Loss: 0.0197
Epoch 20/20, Loss: 0.0202

Test Accuracy: 96.56%

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97        73
           1       1.00      0.99      0.99        77
           2       0.95      0.99      0.97        74
           4       0.97      0.90      0.93        67

    accuracy                           0.97       291
   macro avg       0.97      0.96      0.96       291
weighted avg       0.97      0.97      0.97       291

✅ Model