In [1]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
import torchmetrics

from dataset import CoughDataset

In [2]:
class CoughDataModule(pl.LightningDataModule):
    def __init__(self, df, data_path, batch_size=32, num_workers=4, train_size=0.8, val_size=0.1, test_size=0.1):
        super().__init__()
        
        self.df = df
        self.data_path = data_path
        self.batch_size = batch_size
        self.num_workers = num_workers
        
        self.train_size = train_size
        self.val_size = val_size
        self.test_size = test_size
        
        if self.train_size + self.val_size + self.test_size != 1.0:
            raise Exception('train_size + val_size + test_size must be equal to 1.0')
        
    def setup(self, stage=None):
        dataset = CoughDataset(self.df, self.data_path)

        self.train_dataset, self.val_dataset, self.test_dataset = random_split(dataset, [self.train_size, self.val_size, self.test_size])
            
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)        

In [3]:
class CoughModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        
        # First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        nn.init.kaiming_normal_(self.conv1.weight, a=0.1)
        self.conv1.bias.data.zero_()

        # Second Convolution Block
        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        nn.init.kaiming_normal_(self.conv2.weight, a=0.1)
        self.conv2.bias.data.zero_()

        # Third Convolution Block
        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        nn.init.kaiming_normal_(self.conv3.weight, a=0.1)
        self.conv3.bias.data.zero_()

        # Forth Convolution Block
        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        nn.init.kaiming_normal_(self.conv4.weight, a=0.1)
        self.conv4.bias.data.zero_()

        # Linear Classifier
        self.ap = nn.AdaptiveAvgPool2d(output_size=1)
        self.lin = nn.Linear(in_features=64, out_features=3)
        self.act = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.bn1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.bn2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.bn3(x)
        
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.bn4(x)
        
        x = self.ap(x)
        x = x.view(x.shape[0], -1)
        
        x = self.lin(x)
        x = self.act(x)
        
        return x

In [4]:
class LitCoughClassifier(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        
        self.model = model
        self.criterion = nn.CrossEntropyLoss()
        
        # Metrics
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=3)
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=3)
        self.precision_recall_curve = torchmetrics.PrecisionRecallCurve(task='multiclass', num_classes=3)
        self.confusion_matrix = torchmetrics.ConfusionMatrix(task='multiclass', num_classes=3)

        print('Using device:', self.device)
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        x, y = x.to(self.device), y.to(self.device)
        
        y_hat = self.model(x)
        
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        # Metrics
        accuracy = self.accuracy(y_hat, y)
        self.log('train_accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        f1 = self.f1(y_hat, y)
        self.log('train_f1', f1, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        precision_recall_curve = self.precision_recall_curve(y_hat, y)
        self.log('train_precision_recall_curve', precision_recall_curve, logger=True)
        confusion_matrix = self.confusion_matrix(y_hat, y)
        self.log('train_confusion_matrix', confusion_matrix, logger=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        x, y = x.to(self.device), y.to(self.device)
        
        # Normalize the input
        x = (x - x.mean()) / x.std()
        
        y_hat = self.model(x)
        
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        # Metrics
        accuracy = self.accuracy(y_hat, y)
        self.log('val_accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        f1 = self.f1(y_hat, y)
        self.log('val_f1', f1, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        return optimizer

In [5]:
METADATA_FILE = 'data/metadata_compiled.csv'
DATA_PATH = 'data/'

metadata_df = pd.read_csv(METADATA_FILE)

In [6]:
not_nan_df = metadata_df[metadata_df['status'].isna() == False]
filtered_df = not_nan_df[not_nan_df['cough_detected'] > 0.9]
filtered_df[['uuid', 'cough_detected', 'SNR', 'age', 'gender', 'status']]

Unnamed: 0,uuid,cough_detected,SNR,age,gender,status
1,00039425-7f3a-42aa-ac13-834aaa2b6b92,0.9609,16.151433,15.0,male,healthy
3,0009eb28-d8be-4dc1-92bb-907e53bc5c7a,0.9301,20.146058,34.0,male,healthy
5,001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f,0.9968,13.146502,21.0,male,healthy
11,00291cce-36a0-4a29-9e2d-c1d96ca17242,0.9883,14.603851,15.0,male,healthy
12,0029d048-898a-4c70-89c7-0815cdcf7391,1.0000,9.624196,35.0,male,symptomatic
...,...,...,...,...,...,...
27535,ffd42893-4119-4855-9aad-c67d8d392cc1,0.9414,28.530965,26.0,male,healthy
27539,ffe0658f-bade-4654-ad79-40a468aabb03,1.0000,21.960583,22.0,male,COVID-19
27540,ffe13fcf-c5c2-4a6a-a9fc-e010f4f033c1,0.9485,9.966762,31.0,male,symptomatic
27542,ffedc843-bfc2-4ad6-a749-2bc86bdac84a,1.0000,33.661082,23.0,male,healthy


In [7]:
BATCH_SIZE = 32
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1
NUM_WORKERS = 4

if TRAIN_SPLIT + VAL_SPLIT + TEST_SPLIT != 1.0:
    raise Exception('train_size + val_size + test_size must be equal to 1.0')

dataset = CoughDataset(filtered_df, DATA_PATH)
train_dataset, val_dataset, test_dataset = random_split(dataset, [TRAIN_SPLIT, VAL_SPLIT, TEST_SPLIT])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

In [None]:
model = CoughModel()
classifier = LitCoughClassifier(model)

trainer = pl.Trainer(max_epochs=15, gpus=1)

trainer.fit(classifier, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)