In [6]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
import torchmetrics
import wandb
import gc

from dataset import CoughDataset

import os
os.environ['WANDB_API_KEY'] = '72b34cc4c01608a271beb1a67ff4ce2ed54642b5'

2.0.0
2.0.0+cu117


ImportError: cannot import name 'seed_everything' from 'pytorch_lightning.utilities.seed' (/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/utilities/seed.py)

In [None]:
class CoughDataModule(pl.LightningDataModule):
    def __init__(self, df, data_path, batch_size=32, num_workers=4, train_size=0.8, val_size=0.1, test_size=0.1):
        super().__init__()
        
        self.df = df
        self.data_path = data_path
        self.batch_size = batch_size
        self.num_workers = num_workers
        
        self.train_size = train_size
        self.val_size = val_size
        self.test_size = test_size
        
        if self.train_size + self.val_size + self.test_size != 1.0:
            raise Exception('train_size + val_size + test_size must be equal to 1.0')
        
    def setup(self, stage=None):
        dataset = CoughDataset(self.df, self.data_path)

        self.train_dataset, self.val_dataset, self.test_dataset = random_split(dataset, [self.train_size, self.val_size, self.test_size])
            
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)        

In [None]:
class CoughModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        
        # First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        nn.init.kaiming_normal_(self.conv1.weight, a=0.1)
        self.conv1.bias.data.zero_()

        # Second Convolution Block
        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        nn.init.kaiming_normal_(self.conv2.weight, a=0.1)
        self.conv2.bias.data.zero_()

        # Third Convolution Block
        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        nn.init.kaiming_normal_(self.conv3.weight, a=0.1)
        self.conv3.bias.data.zero_()

        # Forth Convolution Block
        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        nn.init.kaiming_normal_(self.conv4.weight, a=0.1)
        self.conv4.bias.data.zero_()

        # Linear Classifier
        self.ap = nn.AdaptiveAvgPool2d(output_size=1)
        self.lin = nn.Linear(in_features=64, out_features=3)
        self.act = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.bn1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.bn2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.bn3(x)
        
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.bn4(x)
        
        x = self.ap(x)
        x = x.view(x.shape[0], -1)
        
        x = self.lin(x)
        x = self.act(x)
        
        return x

In [None]:
class LitCoughClassifier(pl.LightningModule):
    def __init__(self, model, learning_rate=1e-3):
        super().__init__()
        
        self.model = model
        self.criterion = nn.CrossEntropyLoss()
        
        # Hyperparameters
        self.learning_rate = learning_rate
        
        # Metrics
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=3)
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=3)
        # self.precision_recall_curve = torchmetrics.PrecisionRecallCurve(task='multiclass', num_classes=3)
        # self.confusion_matrix = torchmetrics.ConfusionMatrix(task='multiclass', num_classes=3)

        print('Using device:', self.device)
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        
        y_hat = self.model(x)
        
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss, on_epoch=True, logger=True)
        
        # Metrics
        accuracy = self.accuracy(y_hat, y)
        self.log('train_accuracy', accuracy, on_epoch=True, prog_bar=True, logger=True)
        f1 = self.f1(y_hat, y)
        self.log('train_f1', f1, on_epoch=True, prog_bar=True, logger=True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        x, y = x.to(self.device), y.to(self.device)
        
        # TODO: Move to dataset
        # # Normalize the input
        # x = (x - x.mean()) / x.std()
        
        y_hat = self.model(x)
        
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, on_epoch=True, logger=True)
        
        # Metrics
        accuracy = self.accuracy(y_hat, y)
        self.log('val_accuracy', accuracy, on_epoch=True, logger=True)
        f1 = self.f1(y_hat, y)
        self.log('val_f1', f1, on_epoch=True, logger=True)
        
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
        return optimizer

In [None]:
METADATA_FILE = 'data/metadata_compiled.csv'
DATA_PATH = 'data/'

metadata_df = pd.read_csv(METADATA_FILE)

In [None]:
not_nan_df = metadata_df[metadata_df['status'].isna() == False]
filtered_df = not_nan_df[not_nan_df['cough_detected'] > 0.9]    # TODO: Set as a hyperparameter
filtered_df[['uuid', 'cough_detected', 'SNR', 'age', 'gender', 'status']]

Unnamed: 0,uuid,cough_detected,SNR,age,gender,status
1,00039425-7f3a-42aa-ac13-834aaa2b6b92,0.9609,16.151433,15.0,male,healthy
3,0009eb28-d8be-4dc1-92bb-907e53bc5c7a,0.9301,20.146058,34.0,male,healthy
5,001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f,0.9968,13.146502,21.0,male,healthy
11,00291cce-36a0-4a29-9e2d-c1d96ca17242,0.9883,14.603851,15.0,male,healthy
12,0029d048-898a-4c70-89c7-0815cdcf7391,1.0000,9.624196,35.0,male,symptomatic
...,...,...,...,...,...,...
27535,ffd42893-4119-4855-9aad-c67d8d392cc1,0.9414,28.530965,26.0,male,healthy
27539,ffe0658f-bade-4654-ad79-40a468aabb03,1.0000,21.960583,22.0,male,COVID-19
27540,ffe13fcf-c5c2-4a6a-a9fc-e010f4f033c1,0.9485,9.966762,31.0,male,symptomatic
27542,ffedc843-bfc2-4ad6-a749-2bc86bdac84a,1.0000,33.661082,23.0,male,healthy


In [None]:
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1
NUM_WORKERS = 4

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        
        gc.collect()
        torch.cuda.empty_cache()
        
        try:    
            torch.manual_seed(69) # noice
            
            wandb_logger = WandbLogger(log_model=True)
            
            data_module = CoughDataModule(df=filtered_df, 
                              data_path=DATA_PATH, 
                              batch_size=config.batch_size, 
                              num_workers=NUM_WORKERS, 
                              train_size=TRAIN_SPLIT, 
                              val_size=VAL_SPLIT, 
                              test_size=TEST_SPLIT)

            model = CoughModel()
            classifier = LitCoughClassifier(model=model, 
                                            learning_rate=config.learning_rate)
            
            trainer = pl.Trainer(
                max_epochs=config.max_epochs,
                logger=wandb_logger,
            )
            
            trainer.fit(classifier, data_module)
            
        except Exception as e:
            print(e)
            wandb.finish()
            raise e
        
        del wandb_logger
        del data_module
        del model
        del classifier
        del trainer
        
        gc.collect()
        torch.cuda.empty_cache()

In [None]:
sweep_config = {
  "method": "bayes",
  "metric": {
        "name": "1_val/sharpe",
        "goal": "maximize"
  },
  "parameters": {
    "batch_size": {
        "values": [32, 64, 128]
    },
    "max_epochs": {
        "values": [1, 2]
    },
    "learning_rate": {
        "min": 0.001,
        "max": 0.01
    }
  }
}

sweep_id = wandb.sweep(sweep_config, project='cough-classifier')

wandb.agent(sweep_id, function=train, count=1)

Create sweep with ID: nt6eryc9
Sweep URL: https://wandb.ai/axel-toft/cough-classifier/sweeps/nt6eryc9


[34m[1mwandb[0m: Agent Starting Run: mxmm93bp with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.007900663863569152
[34m[1mwandb[0m: 	max_epochs: 2
[34m[1mwandb[0m: Currently logged in as: [33maxel-toft[0m. Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_deprecation(
Global seed set to 69
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | CoughModel         | 24.9 K
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
-------------------------------------------------
24.9 K    Trainable params
0         Non-trainable params
24.9 K    Total params
0.100     Total estimated model params size (MB)


Using device: cpu


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


0,1
epoch,▁▁▁▁████
train_accuracy,▁█
train_accuracy_epoch,▁█
train_accuracy_step,▁▆█▅
train_f1,▁█
train_f1_epoch,▁█
train_f1_step,▁▆█▅
train_loss_epoch,█▁
train_loss_step,█▄▁▄
trainer/global_step,▁▃▄▄▅▇██

0,1
epoch,1.0
train_accuracy,0.72439
train_accuracy_epoch,0.72285
train_accuracy_step,0.70312
train_f1,0.72439
train_f1_epoch,0.72285
train_f1_step,0.70312
train_loss_epoch,0.84046
train_loss_step,0.85647
trainer/global_step,237.0
