In [None]:
import os
import pandas as pd

In [None]:
# Centralized paths
COMBINED_FOLDER = "./data"
COMBINED_METADATA_PATH = os.path.join(COMBINED_FOLDER, "combined_dataset.csv")

# Load metadata
combined_metadata = pd.read_csv(COMBINED_METADATA_PATH)

FileNotFoundError: [Errno 2] No such file or directory: './data/combined_dataset.csv'

## Data

In [None]:
import torchaudio
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

class SpectrogramDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None, n_fft=2048, hop_length=512, win_length=None):
        """
        Dataset for audio files with log spectrogram computation.

        Parameters:
        - file_paths: List of paths to audio files.
        - labels: List of labels corresponding to the files.
        - transform: Optional torchvision transformations to apply.
        - n_fft: Number of FFT components for STFT.
        - hop_length: Hop length for STFT.
        - win_length: Window length for STFT.
        """
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.win_length = win_length

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]

        # Load audio and compute log spectrogram
        waveform, sample_rate = torchaudio.load(file_path)
        spectrogram = torchaudio.transforms.Spectrogram(
            n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length, power=2.0
        )(waveform)
        spectrogram = (spectrogram + 1e-6).log2()

        # Apply optional transformations
        if self.transform:
            spectrogram = self.transform(spectrogram)

        return spectrogram, label


transform = transforms.Compose([
    transforms.Resize((128, 512)),  # Resize to the input size required by DenseNet
    transforms.Normalize(mean=[0.5], std=[0.5]),  # Normalize grayscale
])

In [None]:
combined_metadata["label"] = combined_metadata["label"].astype("category")

# Create datasets and dataloaders
train_paths = combined_metadata[combined_metadata["split"] == "train"]["file_path"].tolist()
train_labels = combined_metadata[combined_metadata["split"] == "train"]["label"].cat.codes.tolist()

val_paths = combined_metadata[combined_metadata["split"] == "val"]["file_path"].tolist()
val_labels = combined_metadata[combined_metadata["split"] == "val"]["label"].cat.codes.tolist()

train_dataset = SpectrogramDataset(train_paths, train_labels, transform=transform)
val_dataset = SpectrogramDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

## Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from typing import Any, List, Tuple


class _DenseLayer(nn.Module):
    def __init__(self, num_input_features: int, growth_rate: int, bn_size: int, drop_rate: float) -> None:
        super().__init__()
        self.norm1 = nn.BatchNorm2d(num_input_features)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)

        self.norm2 = nn.BatchNorm2d(bn_size * growth_rate)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)

        self.drop_rate = drop_rate

    def forward(self, input: List[torch.Tensor]) -> torch.Tensor:
        concated_features = torch.cat(input, 1)
        bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features)))
        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
        return new_features


class _DenseBlock(nn.ModuleDict):
    def __init__(self, num_layers: int, num_input_features: int, bn_size: int, growth_rate: int, drop_rate: float) -> None:
        super().__init__()
        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
            )
            self.add_module(f"denselayer{i + 1}", layer)

    def forward(self, init_features: torch.Tensor) -> torch.Tensor:
        features = [init_features]
        for layer in self.values():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)


class _Transition(nn.Sequential):
    def __init__(self, num_input_features: int, num_output_features: int) -> None:
        super().__init__()
        self.add_module("norm", nn.BatchNorm2d(num_input_features))
        self.add_module("relu", nn.ReLU(inplace=True))
        self.add_module("conv", nn.Conv2d(num_input_features, num_output_features, kernel_size=1, stride=1, bias=False))
        self.add_module("pool", nn.AvgPool2d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    def __init__(
        self,
        num_classes: int = 10,
        growth_rate: int = 32,
        block_config: Tuple[int, int, int, int] = (6, 12, 32, 32),
        num_init_features: int = 64,
        bn_size: int = 4,
        drop_rate: float = 0.0,
        ) -> None:
        super().__init__()

        # Initial convolution for single-channel input
        self.features = nn.Sequential(
            OrderedDict(
                [
                    ("conv0", nn.Conv2d(1, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
                    ("norm0", nn.BatchNorm2d(num_init_features)),
                    ("relu0", nn.ReLU(inplace=True)),
                    ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
                ]
            )
        )

        # Dense blocks and transitions
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate,
                drop_rate=drop_rate,
            )
            self.features.add_module(f"denseblock{i + 1}", block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module(f"transition{i + 1}", trans)
                num_features = num_features // 2

        # Final batch normalization
        self.features.add_module("norm5", nn.BatchNorm2d(num_features))

        # Classification layer
        self.classifier = nn.Linear(num_features, num_classes)

        # Weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

In [None]:
classes = ["normal", "artifact", "murmur", "extrastole", "extrahls"]
model = DenseNet(num_classes=len(classes), block_config=(6, 12, 32, 32))

## Training

In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
scheduler = MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if device.type == 'cuda':
    torch.cuda.empty_cache()
model.to(device)

num_epochs = 90
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for spectrograms, labels in train_loader:
        spectrograms, labels = spectrograms.to(device), labels.to(device)

        # Forward pass
        outputs = model(spectrograms)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        running_loss += loss.item()

    scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for spectrograms, labels in val_loader:
            spectrograms, labels = spectrograms.to(device), labels.to(device)
            outputs = model(spectrograms)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# torch.save(model.state_dict(), "model.pth")

Using device: cuda
Epoch [1/90], Loss: 0.8967
Validation Loss: 1.0756, Accuracy: 64.10%
Epoch [2/90], Loss: 0.8390
Validation Loss: 1.0560, Accuracy: 64.10%
Epoch [3/90], Loss: 0.8377
Validation Loss: 0.9883, Accuracy: 64.10%
Epoch [4/90], Loss: 0.8481
Validation Loss: 1.0130, Accuracy: 65.81%
Epoch [5/90], Loss: 0.8210
Validation Loss: 0.9088, Accuracy: 66.67%
Epoch [6/90], Loss: 0.8229
Validation Loss: 0.9355, Accuracy: 64.96%
Epoch [7/90], Loss: 0.8141
Validation Loss: 1.1078, Accuracy: 65.81%
Epoch [8/90], Loss: 0.8630
Validation Loss: 1.0646, Accuracy: 64.96%
Epoch [9/90], Loss: 0.8151
Validation Loss: 1.6283, Accuracy: 13.68%
Epoch [10/90], Loss: 0.8386
Validation Loss: 1.3546, Accuracy: 22.22%
Epoch [11/90], Loss: 0.8236
Validation Loss: 1.9977, Accuracy: 10.26%
Epoch [12/90], Loss: 0.7994
Validation Loss: 0.9831, Accuracy: 64.96%
Epoch [13/90], Loss: 0.7854
Validation Loss: 2.7277, Accuracy: 12.82%
Epoch [14/90], Loss: 0.7929
Validation Loss: 1.7288, Accuracy: 15.38%
Epoch [15/