In [1]:
#!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
#!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

In [2]:
!pip install --upgrade pip
!pip install --upgrade albumentations
!pip install neptune-client
!pip install pytorch-lightning
!pip install pytorch_ranger

!pip install --upgrade pip
!pip install --upgrade --force-reinstall --no-deps kaggle
!pip install timm
!pip install yacs

Collecting pip
  Downloading pip-20.3.3-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 1.3 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.2.4
    Uninstalling pip-20.2.4:
      Successfully uninstalled pip-20.2.4
Successfully installed pip-20.3.3
Collecting kaggle
  Downloading kaggle-1.5.10.tar.gz (59 kB)
[K     |████████████████████████████████| 59 kB 1.6 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25ldone
[?25h  Created wheel for kaggle: filename=kaggle-1.5.10-py3-none-any.whl size=73267 sha256=32791299d24e455613aa8aa27e38f68b4d149fb2d761acdc60cb1d3194170137
  Stored in directory: /home/giorgio/.cache/pip/wheels/a6/c1/5e/2b235e19b52c15ad35812881f8de4461399907e219c03bf7b5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.9
    Uninstal

In [3]:
!mkdir /root/.kaggle/
!cp -avr /content/drive/MyDrive/kaggle.json /root/.kaggle/

!kaggle competitions download -c cassava-leaf-disease-classification
!git clone https://github.com/GenoM87/cassava_leaf.git
!mkdir cassava_leaf/data
!mkdir cassava_leaf/experiments
!unzip cassava-leaf-disease-classification.zip -d cassava_leaf/data

!python cassava_leaf/src/create_folds.py

mkdir: impossibile creare la directory "/root/.kaggle/": Permesso negato
cp: failed to access '/root/.kaggle/': Permesso negato
Downloading cassava-leaf-disease-classification.zip to /home/giorgio/Scrivania/Kaggle/cassava_leaf/notebooks
  1%|▎                                     | 54.0M/5.76G [00:11<21:20, 4.78MB/s]^C
  1%|▎                                     | 54.0M/5.76G [00:11<21:02, 4.86MB/s]
User cancelled operation
Clone in 'cassava_leaf' in corso...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (81/81), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 81 (delta 31), reused 71 (delta 25), pack-reused 0[K
Decompressione degli oggetti in corso: 100% (81/81), 3.34 MiB | 2.94 MiB/s, fatto.
Archive:  cassava-leaf-disease-classification.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile 

In [2]:
import os
os.chdir('../src')

In [3]:
import numpy as np
import pandas as pd

import sys, os, time, logging, datetime, random
from pathlib import Path

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

from config import _C as cfg
from models.create_model import CustomNet

from data_builder import build_valid_loader, build_train_loader
from models.optimizer import make_optimizer
from models.scheduler import make_scheduler

#TODO: provare ad usare questo
from models.loss import BiTemperedLogisticLoss

from pytorch_lightning.loggers import NeptuneLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
#Creo lla directory per l'esperimento
path_exp = os.path.join(
    cfg.PROJECT_DIR, 'experiments', cfg.MODEL.NAME, str(datetime.date.today())
)

Path(path_exp).mkdir(parents=True, exist_ok=True)

In [5]:
def set_seed(seed=2004):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    
set_seed(cfg.RANDOM_STATE)

In [6]:
class cassavaModel(pl.LightningModule):

    def __init__(self, cfg):
        super().__init__()

        self.cfg = cfg
        self.model = CustomNet(
            self.cfg
        )
        self.train_accuracy = pl.metrics.Accuracy()
        self.valid_accuracy = pl.metrics.Accuracy()
        self.loss_fn = BiTemperedLogisticLoss(
            t1=self.cfg.SOLVER.BIT_T1,
            t2=self.cfg.SOLVER.BIT_T2,
            smoothing=self.cfg.SOLVER.SMOOTHING_LOSS 
        )
        
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y, ids = batch
        y_hat = self.model(x)
        loss = self.loss_fn(y_hat, y)
        self.log('train_loss', loss)
        return {'train_loss': loss}
    
    def validation_step(self, batch, batch_idx):
        x, y, ids = batch
        y_hat = self.model(x)
        val_loss = loss_fn(y_hat, y)
        self.log('val_loss', val_loss)
        return {'val_loss': val_loss}

    def training_epoch_end(self, outputs):
        train_loss_mean = torch.stack([output["loss"] for output in outputs]).mean()
        train_acc_mean = self.train_acc.compute()
        self.log_dict(
            {"train_loss": train_loss_mean, 
            "train_acc": train_acc_mean, 
            "step": self.current_epoch}
        )

    def validation_epoch_end(self, outputs):
        val_loss_mean = torch.stack([output["val_loss"] for output in outputs]).mean()
        valid_acc_mean = self.valid_accuracy.compute()
        log_dict = {"val_loss": val_loss_mean, "val_acc": valid_acc_mean}
        self.log_dict(log_dict, prog_bar=True)
        self.log_dict({"step": self.current_epoch})

    def configure_optimizers(self):
        optimizer = make_optimizer(self.model, self.cfg)
        scheduler = make_scheduler(optimizer, self.cfg)
        return {
        'optimizer': optimizer,
        'lr_scheduler': scheduler,
        'monitor': 'val_loss'
        }

    def train_dataloader(self):
        loader = build_train_loader(self.cfg)
        return loader

    def val_dataloader(self):
        loader = build_valid_loader(self.cfg)
        return loader

In [7]:
model = cassavaModel(cfg)

In [8]:
neptuneLogger = NeptuneLogger(
  api_key = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYzIwOTQwOWYtZGE3YS00OTBjLTk1ODYtNjg1NGJiZDU3ZTQ0In0=',
  project_name='geno/cassava-kaggle',
  params = cfg,
  close_after_fit=False,
  tags=['Cassava','classification']
)

checkpoint = ModelCheckpoint(
    dirpath = path_exp,
    save_weights_only=True,
    monitor = 'val_loss',
    filename='cassava-{epoch:02d}-{val_loss:.4f}',
    mode='min',
)

psutil is not installed. You will not be able to abort this experiment from the UI.
psutil is not installed. Hardware metrics will not be collected.
https://ui.neptune.ai/geno/cassava-kaggle/e/CAS-20
NeptuneLogger will work in online mode


In [9]:
trainer = pl.Trainer(
    #tpu_cores=8,
    gpus = 1,
    #precision=16,
    accumulate_grad_batches=cfg.SOLVER.ACC_GRADIENT,
    max_epochs=cfg.SOLVER.NUM_EPOCHS,
    logger= neptuneLogger,
    default_root_dir=path_exp,
    callbacks = [checkpoint],
)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


In [10]:
trainer.fit(model)


  | Name           | Type                   | Params
----------------------------------------------------------
0 | model          | CustomNet              | 23.0 M
1 | train_accuracy | Accuracy               | 0     
2 | valid_accuracy | Accuracy               | 0     
3 | loss_fn        | BiTemperedLogisticLoss | 0     
----------------------------------------------------------
23.0 M    Trainable params
0         Non-trainable params
23.0 M    Total params
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]