In [18]:
import os
import sounddevice as sd
import numpy as np
import pandas as pd
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

In [19]:
# Load dataset
df = pd.read_csv('Training Dataset/training datalist.csv')
display(df.head())

Unnamed: 0,ID,Sex,Age,Disease category,Narrow pitch range,Decreased volume,Fatigue,Dryness,Lumping,heartburn,...,Onset of dysphonia,Noise at work,Occupational vocal demand,Diabetes,Hypertension,CAD,Head and Neck Cancer,Head injury,CVA,Voice handicap index - 10
0,1202f15,2,39,1,1,1,1,1,1,0,...,2,3,1,0,0,0,0,0,0,22.0
1,0600ve0,1,69,2,1,1,1,1,0,0,...,2,1,3,0,0,0,0,0,1,19.0
2,1001o7l,2,59,2,1,1,1,1,0,0,...,2,3,4,0,0,0,0,0,0,18.0
3,1201c1t,2,47,1,1,0,1,1,1,0,...,3,1,1,0,0,0,0,0,0,27.0
4,0402jvt,1,87,1,0,0,0,0,0,0,...,1,1,4,0,1,0,0,0,0,16.0


In [20]:
class AudioDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.file_list = os.listdir(data_dir)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        file_path = os.path.join(self.data_dir, self.file_list[index])
        waveform, sample_rate = torchaudio.load(file_path)
        # You can perform additional preprocessing or transformations here
        # get the filename split by '.'
        file_name = self.file_list[index].split('.')[0]
        # print(filename)
        # Check if the file ID is in the DataFrame
        if file_name in df['ID'].values:
            label = df.loc[df['ID'] == file_name, 'Disease category'].values[0]
        else:
            label = -1

        # return waveform, sample_rate, label
        return waveform, label


In [21]:
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Classifier, self).__init__()
        self.bn = nn.BatchNorm1d(input_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [31]:
# Define hyperparameters
input_size = 132300
hidden_size = 20
num_classes = 5
learning_rate = 0.001
batch_size = 32
num_epochs = 10

In [32]:
data_dir = 'Training Dataset/training_voice_data'
dataset = AudioDataset(data_dir)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [17]:
# play audio
index = 0  # Index of the audio sample to play
waveform, sample_rate, label = dataset[index]

audio_np = waveform.squeeze().numpy()
sd.play(audio_np, sample_rate)

status = sd.wait()  # Wait until file is done playing