In [None]:
from google.colab import drive
import json
import os

drive.mount('/content/drive')

with open("/content/drive/MyDrive/api_keys.json") as file:
  KEYS = json.load(file)

os.environ["GITHUB_USER"] = KEYS['GITHUB_USER']
os.environ["GITHUB_TOKEN"] = KEYS['GITHUB_TOKEN']

In [None]:
! rm -r cda
! git clone --branch lab-1 --filter=blob:none https://$GITHUB_USER:$GITHUB_TOKEN@github.com/DanyloMelnyk/ml_lab.git

In [None]:
! pip install -r /content/ml_lab/requirements.txt

In [None]:
! nvidia-smi

In [None]:
%load_ext autoreload
%autoreload 1

In [None]:
import wandb
wandb.login(key=KEYS["WANDB_KEY"])

In [None]:
!unzip -o /content/drive/MyDrive/data/INbreastProcessed.zip -d data

In [None]:
from pathlib import Path

import lightning as L
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import wandb
from dataloader import create_dataloader
from lightning.pytorch.callbacks import (
    EarlyStopping,
    LearningRateMonitor,
    ModelCheckpoint,
)
from pytorch_lightning.loggers import WandbLogger
from torch.optim.lr_scheduler import StepLR
from torchinfo import summary

In [None]:
%aimport classifier_trainer
%aimport models

In [None]:
images_col = "segmented_file_path"
optimizer_adam = True

train = create_dataloader(
    Path("../data/INbreast Release 1.0/train_processed_png_labels.csv"),
    is_train=True,
    weighted_train_sampler=True,
    batch_size=20,
    num_workers=8,
    images_path_col=images_col,
    # unprocessed_file_path,ma_file_path,normalized_file_path,segmented_file_path
)
test = create_dataloader(
    Path("../data/INbreast Release 1.0/test_processed_png_labels.csv"),
    is_train=False,
    weighted_train_sampler=False,
    batch_size=20,
    num_workers=8,
    images_path_col=images_col,
)

batch_shape = next(iter(train))[0].shape
batch_shape

In [None]:
model = models.create_squeezenet_model(False)
summary(
        model,
        input_size=batch_shape,
        col_names=("output_size", "num_params", "trainable"),
        row_settings=["var_names"],
    )

In [None]:
if optimizer_adam:
    adam_params = {
        "lr": 0.01,
        "betas": (0.9, 0.999),
        "weight_decay": 0.0,
        "amsgrad": False,
        "eps": 1e-08,
        # batch_shape 20
        # epochs 20
    }

    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=0.001,
        weight_decay=0.0005,
        betas=(0.5, 0.999),
    )
    scheduler = None
else:
    sgd_params = {
        "lr": 0.0001,
        "momentum": 0.9,
        "weight_decay": 0.0001,
        # batch_shape 20
        # epochs 20
        # lr drop factor 0.5
        # lr drop period 5
    }
    optimizer = torch.optim.SGD(model.parameters(), **sgd_params)
    scheduler = StepLR(optimizer, 5, 0.5)

In [None]:
wandb_logger = WandbLogger(project='ml_lab', log_model="all")
clf_trainer = classifier_trainer.ClassifierTrainer(
    net=model,
    optimizer=optimizer,
    scheduler=scheduler,
    num_classes=3,
)

lr_monitor = LearningRateMonitor(logging_interval="step")
early_stopping = EarlyStopping(monitor="roc_auc_avg_val", patience=15, verbose=True, mode="max")
checkpoints = ModelCheckpoint(monitor="roc_auc_avg_val", save_last=True, mode="max")

trainer = L.Trainer(
    max_epochs=20,
    log_every_n_steps=3,
    logger=wandb_logger,
    callbacks=[lr_monitor, early_stopping, checkpoints],
)
trainer.fit(model=clf_trainer, train_dataloaders=train, val_dataloaders=test)
wandb.finish()