# Gitlab setup

In [None]:
token = 
email = 
username = 
repo = 
id = 
branch = 
experiment =

!git config --global user.email {email}
!git config --global user.name {username}

import os
os.environ['MLFLOW_TRACKING_URI'] = f'https://gitlab.com/api/v4/projects/{id}/ml/mlflow'
os.environ['MLFLOW_TRACKING_TOKEN'] = token

In [None]:
# !git clone -b {branch} https://oauth2:{token}@gitlab.com/{username}/{repo}.git

# Data setup

In [4]:
from pathlib import Path
from fastai.vision.all import *
import albumentations
from DLOlympus.training.transforms import AlbumentationsTransform
from DLOlympus.training.utils import get_model

In [None]:
# Paths
root = 'path_to_main_folder/'
images_folder = root/'images/'
save_path = root/
other_paths = root/'declare_other_paths/'

In [None]:
# Hyperparameters

h, w = 200, 200

hyperparameters = {
    'BS': 16,
    'EPOCHS': 30,
    'IMG_SIZE': (h, w),      # (height, width)
    'WD': 0.15,
    'TRANSFORMS': [
        albumentations.HorizontalFlip(p=0.5),
        albumentations.VerticalFlip(p=0.5),
        albumentations.Rotate(p=0.5),
        albumentations.Sharpen(p=0.5),
        albumentations.ColorJitter(brightness=0.3, contrast=0.5, saturation=0.5, hue=0.0, p=0.5),
        albumentations.RGBShift(p=0.5),
        albumentations.GaussianBlur(p=0.5),
        albumentations.GaussNoise(p=0.5),
        albumentations.RandomSizedCrop((int(0.75*h),h), h, w, p=1.0)
        ],
    'ARCH': 'resnet50',
    'ARCH_TYPE': 'torchvision',
    'SEED': 18,
}

# Metrics and callbacks
metrics = [accuracy, F1Score(average='macro')]
callbacks = [SaveModelCallback(monitor='f1_score', with_opt=True), ShowGraphCallback]

In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedGroupKFold

def get_data():
    image_files = 
    labels = 
    groups = 
    return image_files, labels, groups

def create_df(image_files, labels, groups, n_splits=10, n_valid=2):
    # Initiate dataframe
    df = pd.DataFrame()
    df['file_path'] = image_files
    df['label'] = labels
    df['groups'] = groups
    df['fold'] = -1
    # Make folds
    cv = StratifiedGroupKFold(n_splits=n_splits)
    for i, (train_idxs, valid_idxs) in enumerate(cv.split(image_files, labels, groups)):
        df.loc[valid_idxs, ['fold']] = i
    # Assign folds for validation
    df['split'] = 'train'
    for i in range (n_valid):
        df.loc[df.fold == i, ['split']] = 'valid'
    del df['fold']
    df.split.value_counts()
    # Add a binary column to the dataframe
    df['is_valid'] = df.split == 'valid'
    del df['split']
    return df

In [None]:
set_seed(hyperparameters['SEED'], True)

# Datablock
block = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_x=ColReader('file_path'),
    get_y=ColReader('label'),
    splitter=ColSplitter(col='is_valid'),
    item_tfms=[
        Resize(hyperparameters['IMG_SIZE'], method='squish'), 
        AlbumentationsTransform(albumentations.Compose(hyperparameters['TRANSFORMS']))])

# Dataframe
image_files, labels, groups = get_data()
df = create_df(image_files, labels, groups)

# Dataloaders
dls = block.dataloaders(df, bs=hyperparameters['BS'], shuffle=True)
dls.rng.seed(hyperparameters['SEED'])

# Sanity check
num_classes = dls.c
classes = dls.vocab
print('Number of clases: ', num_classes)
print('Names of classes: ', classes)

In [None]:
# Show batch
dls.train.show_batch(max_n=16, figsize=(10,8))

In [None]:
# Show transforms
dls.train.show_batch(max_n=16, unique=True, figsize=(10,8))

In [None]:
# Learner
learn = vision_learner(dls,
                        get_model(hyperparameters),
                        normalize=True,
                        pretrained=True,
                        opt_func=Adam,
                        metrics=metrics,
                        wd=hyperparameters['WD']).to_fp16()

# Training

In [None]:
# Find LR
learn.lr_find()

In [None]:
# Set LR
hyperparameters['LR'] = 

In [None]:
# Train
learn.fine_tune(hyperparameters['EPOCHS'], base_lr=hyperparameters['LR'], cbs=callbacks)

# Results and logs

In [None]:
learn.export(f'{save_path}/model.pkl')
learn.save(f'{save_path}/model')

from DLOlympus.training.plots import plot_confusion_matrix, plot_losses, plot_metrics
_ = plot_losses(learn, save_path)
_ = plot_metrics(learn, save_path)
probs, ground_truths = learn.get_preds(ds_idx=1)        # DO NOT PREDICT BEFORE PLOTTING LOSSES AND METRICS
predictions = np.argmax(probs, axis=1)
_ = plot_confusion_matrix(ground_truths, predictions, learn.dls.vocab, save_path)

from DLOlympus.training.tables import get_predictions_table
train_table = get_predictions_table(learn, learn.dls.train)
valid_table = get_predictions_table(learn, learn.dls.valid)
train_table.to_csv(f'{save_path}train_table.csv', index=False)
valid_table.to_csv(f'{save_path}valid_table.csv', index=False)

from DLOlympus.training.utils import get_metrics
results = get_metrics(learn, with_tta=True)

In [None]:
from DLOlympus.training.mlflow import mlflow_log

mlflow_log(save_path, hyperparameters, results, experiment)