In [295]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [296]:
# Load dataset
df = pd.read_csv('Training Dataset/training datalist.csv')

# data cleaning
df['PPD'].fillna(0, inplace=True)
df.dropna(subset=['Voice handicap index - 10'], inplace=True)
df.reset_index(drop=True, inplace=True)

columns_to_drop = ['ID', 'Disease category', 'Diabetes', 'Hypertension', 'CAD', 'Head and Neck Cancer', 'Head injury', 'CVA', 'Onset of dysphonia ', 'Diurnal pattern']
# get certain columns as features
features = df.columns.drop(columns_to_drop).to_list()
# print(features)

In [297]:
# split data into train and test
from sklearn.model_selection import train_test_split

X = df[features]
y = df['Disease category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [298]:
# Define your custom dataset class
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y - 1
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        try:
            tup = self.X[index].astype(np.float32), self.y[index].astype(np.int64)
        except:
            import ipdb
            ipdb.set_trace()
        return tup

# Define your model class
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size, hidden_size*4)
        self.fc3 = nn.Linear(hidden_size*4, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out


In [299]:
# Define hyperparameters
input_size = 18
hidden_size = 20
num_classes = 5
learning_rate = 0.001
batch_size = 32
num_epochs = 50

In [300]:
# Create your dataset and dataloader
train_dataset = CustomDataset(X_train.values, y_train.values)  # Replace X and y with your own data
test_dataset = CustomDataset(X_test.values, y_test.values)  # Replace X and y with your own data
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [301]:
# Create your model
model = Classifier(input_size, hidden_size, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [302]:
# Training loop
for epoch in range(num_epochs):
    for batch_idx, (inputs, labels) in enumerate(train_dataloader):
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print training progress
        if (batch_idx+1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_dataloader)}], Loss: {loss.item():.4f}")

Epoch [1/50], Step [10/22], Loss: 1.3270
Epoch [1/50], Step [20/22], Loss: 1.3678
Epoch [2/50], Step [10/22], Loss: 1.3011
Epoch [2/50], Step [20/22], Loss: 1.2346
Epoch [3/50], Step [10/22], Loss: 1.2716
Epoch [3/50], Step [20/22], Loss: 1.1259
Epoch [4/50], Step [10/22], Loss: 1.0765
Epoch [4/50], Step [20/22], Loss: 1.0634
Epoch [5/50], Step [10/22], Loss: 1.0152
Epoch [5/50], Step [20/22], Loss: 1.0026
Epoch [6/50], Step [10/22], Loss: 1.1803
Epoch [6/50], Step [20/22], Loss: 1.0246
Epoch [7/50], Step [10/22], Loss: 1.0400
Epoch [7/50], Step [20/22], Loss: 1.1066
Epoch [8/50], Step [10/22], Loss: 1.0691
Epoch [8/50], Step [20/22], Loss: 0.7458
Epoch [9/50], Step [10/22], Loss: 1.0081
Epoch [9/50], Step [20/22], Loss: 1.1105
Epoch [10/50], Step [10/22], Loss: 1.1659
Epoch [10/50], Step [20/22], Loss: 1.2017
Epoch [11/50], Step [10/22], Loss: 1.0760
Epoch [11/50], Step [20/22], Loss: 0.8350
Epoch [12/50], Step [10/22], Loss: 0.7879
Epoch [12/50], Step [20/22], Loss: 1.1196
Epoch [13/

In [303]:
# Evaluation
model.eval()
with torch.no_grad():
    total_correct = 0
    total_samples = 0
    for inputs, labels in test_dataloader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
    
    accuracy = total_correct / total_samples
    print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.6644


In [304]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

model.eval()
with torch.no_grad():
    total_correct = 0
    total_samples = 0
    predicted_labels = []
    targets = []

    for inputs, labels in test_dataloader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

        predicted_labels.extend(predicted.tolist())
        targets.extend(labels.tolist())

    accuracy = accuracy_score(targets, predicted_labels)
    precision = precision_score(targets, predicted_labels, average='weighted', zero_division=1)
    recall = recall_score(targets, predicted_labels, average='weighted', zero_division=1)
    f1 = f1_score(targets, predicted_labels, average='weighted', zero_division=1)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 score: {f1:.4f}")

    print(classification_report(targets, predicted_labels, zero_division=1))
    print(confusion_matrix(targets, predicted_labels))

Accuracy: 0.6644
Precision: 0.6408
Recall: 0.6644
F1 score: 0.6334
              precision    recall  f1-score   support

           0       0.73      0.87      0.80       162
           1       0.51      0.42      0.46        60
           2       0.61      0.62      0.61        50
           3       0.17      0.06      0.08        18
           4       1.00      0.00      0.00         8

    accuracy                           0.66       298
   macro avg       0.60      0.39      0.39       298
weighted avg       0.64      0.66      0.63       298

[[141  13   6   2   0]
 [ 25  25   9   1   0]
 [ 11   7  31   1   0]
 [ 10   4   3   1   0]
 [  5   0   2   1   0]]
