# 50.039 Deep Learning Project

Group Members:
- Lee Chang Zheng
- Lee Cheng Xin
- Jason Peng Jing Ming

## Imports

In [17]:
import torch
import torchaudio
from torchaudio import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import os

## Converting Audio samples into Spectrograms

Before we can begin, we first have to convert the audio samples from .webm format into a standardised format. We will convert the files to .wav with single channel, a sample rate of 48000 Hz, and pad the audio files to 10 seconds long. 

In [18]:
# Convert webm and ogg files to wav with single channel, sample rate of 48000 Hz, padded to 10 seconds long
import subprocess
from pydub import AudioSegment

def convert_audio(in_path, out_path):
    if '.webm' in in_path:
        subprocess.run(["ffmpeg", "-i", in_path, "-y", "-ac", "1", "-ar", "48000", out_path.replace('.webm', '.wav')])
    elif '.ogg' in in_path:
        subprocess.run(["ffmpeg", "-i", in_path, "-y", "-ac", "1", "-ar", "48000", out_path.replace('.ogg', '.wav')])

def pad_audio(in_path, out_path):
    audio = AudioSegment.from_wav(in_path)
    if len(audio) < 10000:
        padding = AudioSegment.silent(duration=10000 - len(audio))
        padded_audio = audio + padding
        padded_audio.export(out_path, format='wav')

# Note: These are commented out as the conversion has been done, it is simply for reference. You will need FFmpeg to run this.
# for filename in os.listdir('./Data/Covid'):
#     convert_audio(f'./Data/Covid/{filename}', f'./Converted/{filename}')
# for filename in os.listdir('./Data/Healthy'):
#     convert_audio(f'./Data/Healthy/{filename}', f'./Converted/{filename}')

# # Padding the audio to 10 seconds long
# for filename in os.listdir('./Converted'):
#     pad_audio(f'./Converted/{filename}', f'./Converted/{filename}')

After standardizing the audio samples, we need to convert them into a Mel Spectrogram for the CNN model to process. 

In [19]:
# Converts the audio waveform into a spectrogram
def audio_to_spec(audio, sample_rate, n_mels=128, n_fft=400, win_length=None, hop_length=None, top_db=80):
    mel_spectrogram = transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_fft=n_fft,
        win_length=win_length,
        hop_length=hop_length,
        center=True,
        pad_mode="reflect",
        n_mels=n_mels,
    )
    
    melspec = mel_spectrogram(audio)
    
    transform = transforms.AmplitudeToDB(top_db=top_db)
    final_spec = transform(melspec)
    return (final_spec)

## Custom Dataset and Dataloader

In [20]:
class CovidCoughDataset(Dataset):
    def __init__(self, filename, datapath):
        self.df = pd.read_excel(filename)
        self.datapath = datapath

    def __getitem__(self, index):
        audio_file = self.datapath + self.df.iloc[index][0] + '.wav'
        status = 1 if self.df.iloc[index][1] == 'COVID' else 0        
        audio, sample_rate = torchaudio.load(audio_file)
        spec = audio_to_spec(audio=audio, sample_rate=sample_rate)
        return spec, status
        
    def __len__(self):        
        return len(self.df)

In [24]:
# Load the dataset
batch_size = 16

dataset = CovidCoughDataset('./Data/Dataset.xlsx', './Converted/')
print(dataset[8])
# train_dataset, valid_dataset, test_dataset = random_split(dataset, [0.8, 0.1, 0.1])
# train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
# valid_dataloader = DataLoader(valid_dataset, batch_size = batch_size, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle=True)

(tensor([[[-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789],
         [-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789],
         [-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789],
         ...,
         [-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789],
         [-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789],
         [-46.4789, -46.4789, -46.4789,  ..., -46.4789, -46.4789, -46.4789]]]), 0)


  audio_file = self.datapath + self.df.iloc[index][0] + '.wav'
  status = 1 if self.df.iloc[index][1] == 'COVID' else 0
