In [1]:
#Import
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from torch.utils.data import DataLoader
from torchvision.models import efficientnet_b0

from utils.dataset import BirdDataset

import matplotlib.pyplot as plt
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torchmetrics as tm
import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Weight and biases
wandb.login(key = '15b11b57c09bdcd801af92cecb362a1f2634d213')
wandb.init(project="cuitcuit")
wandb_logger = WandbLogger()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnathan-vidal[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/utilisateur/.netrc


  rank_zero_warn(


In [3]:
class Lumiere(pl.LightningModule):
    def __init__(self, model : nn.Module, criterion : nn.Module, num_classes : int) -> None:
        super().__init__()
        self.model = model
        self.criterion = criterion
        
        # logger
        self.save_hyperparameters()
        
        # metrics
        self.accuracy = tm.Accuracy(task = 'multiclass', num_classes = num_classes)
        self.f1 = tm.F1Score(task = 'multiclass', num_classes = num_classes)
        self.cf = tm.ConfusionMatrix(task = 'multiclass', num_classes = num_classes)

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        logits = self.model(x)
        print(logits.shape)
        loss = self.criterion(logits, y)
        
        # metrics
        self.accuracy(logits, y)
        self.f1(logits, y)
        
        # log metrics
        self.log('train_loss', loss, prog_bar=True, logger = True, on_epoch=True)
        self.log('train_acc', self.accuracy, prog_bar=True, logger = True, on_epoch=True)
        self.log('train_f1', self.f1, prog_bar=True, logger = True, on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # validation_step defines the validation loop.
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        
        # metrics
        self.accuracy(logits, y)
        self.f1(logits, y)
        
        # log metrics
        self.log('valid_loss', loss, prog_bar=True, logger = True, on_epoch=True)
        self.log('valid_acc', self.accuracy, prog_bar=True, logger = True, on_epoch=True)
        self.log('valid_f1', self.f1, prog_bar=True, logger = True, on_epoch=True)
        return loss
    
    def test_step(self, batch, batch_idx):
        # test_step defines the test loop.
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        
        # metrics
        self.accuracy(logits, y)
        self.f1(logits, y)
        
        # log metrics
        self.log('test_loss', loss, prog_bar=True, logger = True, on_epoch=True)
        self.log('test_acc', self.accuracy, prog_bar=True, logger = True, on_epoch=True)
        self.log('test_f1', self.f1, prog_bar=True, logger = True, on_epoch=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 100, eta_min = 1e-6)
        return [optimizer], [lr_scheduler]

In [4]:
# Data
num_classes = 10
csv = pd.read_csv("dataset/spec.csv")

counts = csv['en'].value_counts()
chosen = counts[counts>=100].head(num_classes).index
csv = csv[csv['en'].isin(chosen)]

csv = csv.sample(frac = 1).reset_index(drop = True)
class_mapping = {bird : i for i, bird in enumerate(chosen)}
print(class_mapping)
train = csv.iloc[:int(len(csv)*0.8)]
valid = csv.iloc[int(len(csv)*0.8):int(len(csv)*0.9)]
test = csv.iloc[int(len(csv)*0.9):]

train_dataset = BirdDataset(train, "dataset/spectrograms/", class_mapping)
print(train_dataset[0][0].shape)
valid_dataset = BirdDataset(valid, "dataset/spectrograms/", class_mapping)
test_dataset = BirdDataset(test, "dataset/spectrograms/", class_mapping)

train_loader = DataLoader(train_dataset, batch_size = 16, shuffle = True, num_workers = 4, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size = 16, shuffle = False, num_workers = 4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size = 16, shuffle = False, num_workers = 4, pin_memory=True)

{'houwre': 0, 'houspa': 1, 'comrav': 2, 'eursta': 3, 'redcro': 4, 'gbwwre1': 5, 'sonspa': 6, 'spotow': 7, 'barswa': 8, 'norcar': 9}
torch.Size([3, 128, 313])


In [6]:
# examples = next(iter(train_loader))

# for label, img  in enumerate(examples):
#    plt.imshow(img[0,0,:,:])
#    plt.show()
#    print(f"Label: {label}")

In [7]:
model = efficientnet_b0()
model.classifier = nn.Sequential(
          nn.Dropout(p=0.2, inplace=True),
          nn.Linear(in_features=model.classifier[1].in_features, out_features = num_classes, bias = True)
      )
criterion = nn.CrossEntropyLoss()
model_pl = Lumiere(model, criterion, len(chosen))
callbacks = [ModelCheckpoint(dirpath = 'save/', monitor = 'valid_loss', save_top_k = 1, mode = 'min')]

  rank_zero_warn(
  rank_zero_warn(


In [9]:
#training
trainer = Trainer(accelerator = 'gpu',
                  devices = 1,
                  logger = wandb_logger,)
                  #callbacks= callbacks,)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(model = model_pl, 
             train_dataloaders = train_loader, 
             val_dataloaders = valid_loader)

  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                      | Params
--------------------------------------------------------
0 | model     | EfficientNet              | 4.0 M 
1 | criterion | CrossEntropyLoss          | 0     
2 | accuracy  | MulticlassAccuracy        | 0     
3 | f1        | MulticlassF1Score         | 0     
4 | cf        | MulticlassConfusionMatrix | 0     
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.081    Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

RuntimeError: 0D or 1D target tensor expected, multi-target not supported