Conformer

In [53]:
import torch
import torchaudio
from torch.utils.data import Dataset
from torchaudio.transforms import MelSpectrogram, Resample
import pandas as pd

class AudioDataset(Dataset):
    def __init__(self, csv_file, transform=None, target_length=73):
        self.data = pd.read_csv(csv_file)
        self.file_paths = self.data['file_path'].tolist()
        self.labels = self.data['label'].tolist()
        self.transform = transform
        self.target_length = target_length

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        audio_file = self.file_paths[idx]
        label = self.labels[idx]
        waveform, sample_rate = torchaudio.load(audio_file)

        target_sample_rate = 16000
        if sample_rate != target_sample_rate:
            waveform = Resample(sample_rate, target_sample_rate)(waveform)

        if self.transform:
            waveform = self.transform(waveform)

        if waveform.size(-1) < self.target_length:
            pad_length = self.target_length - waveform.size(-1)
            waveform = torch.nn.functional.pad(waveform, (0, pad_length))
        elif waveform.size(-1) > self.target_length:
            waveform = waveform[:, :, :self.target_length]
        
        label = torch.tensor(label)  # Convert label to tensor here
        return waveform, label



In [54]:
from torch.utils.data import DataLoader
from torchaudio.transforms import MelSpectrogram

transform = MelSpectrogram(sample_rate=16000, n_mels=128, hop_length=512, n_fft=1024)

dataset = AudioDataset("labeled_data.csv", transform=transform, target_length=73)
from sklearn.model_selection import train_test_split
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_dataset, val_dataset = train_test_split(train_dataset, test_size=0.1, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [55]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConformerBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ConformerBlock, self).__init__()
        self.conv1 = nn.Conv1d(output_dim, output_dim, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(output_dim, output_dim, kernel_size=3, padding=1)
        self.attention = nn.MultiheadAttention(output_dim, num_heads=4, batch_first=True)
        self.fc = nn.Linear(output_dim, output_dim)

    def forward(self, x):
        residual = x
        x = x.transpose(1, 2)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.transpose(1, 2)
        attn_output, _ = self.attention(x, x, x)
        x = x + attn_output
        x = self.fc(x)
        return x + residual

class ConformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, num_blocks=6, hidden_dim=256):
        super(ConformerModel, self).__init__()
        self.input_proj = nn.Linear(input_dim, hidden_dim)
        self.conformer_blocks = nn.ModuleList([
            ConformerBlock(hidden_dim, hidden_dim) for _ in range(num_blocks)
        ])
        self.fc_out = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.input_proj(x)
        for block in self.conformer_blocks:
            x = block(x)
        x = torch.mean(x, dim=1)
        x = self.fc_out(x)
        return x

In [57]:

import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConformerModel(input_dim=128, num_classes=7).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def train(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0.0
    correct_preds = 0
    total_preds = 0

    for mel_spectrograms, labels in train_loader:
        mel_spectrograms = mel_spectrograms.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()

        mel_spectrograms = mel_spectrograms.squeeze(1)
        logits = model(mel_spectrograms)
        loss = F.cross_entropy(logits, labels)
        loss.backward()
        
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)
    
    avg_loss = total_loss / len(train_loader)
    accuracy = correct_preds / total_preds * 100
    return avg_loss, accuracy

def validate(model, val_loader, device):
    model.eval()
    total_loss = 0.0
    correct_preds = 0
    total_preds = 0

    with torch.no_grad():
        for mel_spectrograms, labels in val_loader:
            mel_spectrograms = mel_spectrograms.to(device)
            labels = labels.to(device)

            mel_spectrograms = mel_spectrograms.squeeze(1)
            logits = model(mel_spectrograms)
            loss = F.cross_entropy(logits, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
    
    avg_loss = total_loss / len(val_loader)
    accuracy = correct_preds / total_preds * 100
    return avg_loss, accuracy
EPOCHS = 10
best_val_acc = 0.0

for epoch in range(EPOCHS):
    loss, train_acc = train(model, train_loader, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, device)

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model_Conformer.pth")
        print("Saved new best model!")



Epoch 1/10, Loss: 3.2941, Train Acc: 29.69%, Val Loss: 2.3317, Val Acc: 33.54%
Saved new best model!
Epoch 2/10, Loss: 2.4589, Train Acc: 35.05%, Val Loss: 2.3562, Val Acc: 36.14%
Saved new best model!
Epoch 3/10, Loss: 2.1718, Train Acc: 36.62%, Val Loss: 1.8517, Val Acc: 37.17%
Saved new best model!
Epoch 4/10, Loss: 1.9670, Train Acc: 37.51%, Val Loss: 2.0269, Val Acc: 36.56%
Epoch 5/10, Loss: 2.1078, Train Acc: 37.91%, Val Loss: 1.7644, Val Acc: 37.66%
Saved new best model!
Epoch 6/10, Loss: 1.8286, Train Acc: 38.64%, Val Loss: 1.8833, Val Acc: 37.92%
Saved new best model!
Epoch 7/10, Loss: 1.8588, Train Acc: 39.32%, Val Loss: 1.7342, Val Acc: 40.00%
Saved new best model!
Epoch 8/10, Loss: 1.7384, Train Acc: 40.92%, Val Loss: 2.0454, Val Acc: 39.67%
Epoch 9/10, Loss: 1.8031, Train Acc: 41.10%, Val Loss: 1.8621, Val Acc: 40.30%
Saved new best model!
Epoch 10/10, Loss: 1.8103, Train Acc: 41.26%, Val Loss: 1.7012, Val Acc: 41.08%
Saved new best model!


In [None]:
def evaluate(model, test_loader, device):
    model.eval()
    correct_preds = 0
    total_preds = 0

    with torch.no_grad():
        for mel_spectrograms, labels in test_loader:
            mel_spectrograms = mel_spectrograms.to(device)
            labels = labels.to(device)

            mel_spectrograms = mel_spectrograms.squeeze(1) 
            logits = model(mel_spectrograms)

            _, predicted = torch.max(logits, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

    accuracy = correct_preds / total_preds * 100
    return accuracy


model.load_state_dict(torch.load("best_model_Conformer.pth"))
accuracy = evaluate(model, test_loader, device)
print(f"Test Accuracy: {accuracy:.2f}%")
