In [1]:
!pip install torchaudio librosa



In [2]:
from google.colab import drive
import os

drive.mount('/content/drive')
data_path = '/content/drive/MyDrive/Colab Notebooks/SimPL/ICBHI_final_database'  # Change to your dataset path

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import torchaudio
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch

class LungSoundDataset(Dataset):
    def __init__(self, data_path, max_length=882000):  # Set max_length based on the longest file
        self.data_path = data_path
        self.audio_files = [f for f in os.listdir(data_path) if f.endswith('.wav')]
        self.annotations = self._load_annotations()
        self.max_length = max_length

    def _load_annotations(self):
        annotations = {}
        for f in os.listdir(self.data_path):
            if f.endswith('.txt'):
                patient_id = f.split('_')[0]
                file_path = os.path.join(self.data_path, f)
                annotations[patient_id] = pd.read_csv(file_path, sep='\t', header=None)
        return annotations

    def _pad_waveform(self, waveform):
        length = waveform.shape[1]
        if length < self.max_length:
            padding = self.max_length - length
            waveform = torch.nn.functional.pad(waveform, (0, padding))
        else:
            waveform = waveform[:, :self.max_length]
        return waveform

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        audio_file = self.audio_files[idx]
        audio_path = os.path.join(self.data_path, audio_file)
        waveform, sample_rate = torchaudio.load(audio_path)
        waveform = self._pad_waveform(waveform)

        # Extract patient ID from the filename
        patient_id = audio_file.split('_')[0]
        annotation_df = self.annotations[patient_id]

        # Convert the annotations to tensor
        annotations = annotation_df.iloc[:, 2:].values.astype(float)
        annotations = torch.tensor(annotations, dtype=torch.float32)

        # Combine waveform and annotation into a single sample
        sample = {
            'waveform': waveform,
            'sample_rate': sample_rate,
            'annotation': annotations
        }
        return sample

dataset = LungSoundDataset(data_path)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
import torch.nn as nn
import torch.nn.functional as F

class HybridCNNRNN(nn.Module):
    def __init__(self):
        super(HybridCNNRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )
        self.rnn = nn.LSTM(input_size=32, hidden_size=64, num_layers=2, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # Two outputs: Crackles and Wheezes
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.permute(0, 2, 1)  # Change from (batch, channels, length) to (batch, length, channels)
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # Get the output of the last time step
        x = self.fc(x)
        return x

model = HybridCNNRNN()

In [12]:
from torch.optim import Adam
from torch.nn import BCELoss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = BCELoss()
optimizer = Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for data in train_loader:
            inputs, labels = data['waveform'].to(device), data['annotation'].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

train_model(model, train_loader, criterion, optimizer, num_epochs=25)

RuntimeError: stack expects each tensor to be equal size, but got [9, 2] at entry 0 and [7, 2] at entry 1

In [None]:
model_save_path = '/content/drive/MyDrive/Colab Notebooks/SimPL/lung_sound_model.pth'
torch.save(model.state_dict(), model_save_path)