# Data setup

In [None]:
from pathlib import Path
from fastai.vision.all import *
import albumentations

In [None]:
# Paths
root = '' # change as needed
images_folder = root+'results/synthetic_data/G2.3_ts50/img/'
save_path = root+'results/models/'
target_path = root+'results/synthetic_data/G2.3_ts50/seed0_40k_y.npy'
!mkdir -p {save_path}

In [None]:
# Hyperparameters

h, w = 80, 50

hyperparameters = {
    'BS': 16,
    'EPOCHS': 30,
    'IMG_SIZE': (h, w),      # (height, width)
    'WD': 0.0,
    'TRANSFORMS': [
        albumentations.ColorJitter(brightness=0.3, contrast=0.5, saturation=0.5, hue=0.0, p=0.5),
        albumentations.RGBShift(p=0.5),
        ],
    'ARCH': 'convnext_tiny_in22k',
    'SEED': 18,
    'PRETRAINED': True,
}

# Metrics and callbacks
a_range = (-1,1)
def rmse_a(inp, targ):
  return rmse(inp, targ)*100/(a_range[1]-a_range[0])
metrics = [rmse_a]
callbacks = [SaveModelCallback(monitor='rmse_a', comp=np.less, with_opt=True), ShowGraphCallback]

In [None]:
import pandas as pd
from sklearn.model_selection import KFold

def get_data(images_folder, target_path):
    image_files = [images_folder+f'{i}.png' for i in range(len(get_image_files(images_folder)))]
    labels = np.load(target_path)
    return image_files, labels

def create_df(image_files, labels, n_splits=10, n_valid=2):
    # Initiate dataframe
    df = pd.DataFrame()
    df['file_path'] = image_files
    df['label'] = [i for i in labels]
    df['fold'] = -1
    # Make folds
    cv = KFold(n_splits=n_splits)
    for i, (train_idxs, valid_idxs) in enumerate(cv.split(image_files)):
        df.loc[valid_idxs, ['fold']] = i
    # Assign folds for validation
    df['split'] = 'train'
    for i in range (n_valid):
        df.loc[df.fold == i, ['split']] = 'valid'
    del df['fold']
    df.split.value_counts()
    # Add a binary column to the dataframe
    df['is_valid'] = df.split == 'valid'
    del df['split']
    return df

In [None]:
# Dataframe
image_files, labels = get_data(images_folder, target_path)
df = create_df(image_files, labels)

In [None]:
class AlbumentationsTransform(DisplayedTransform):
    '''
    Class that allows the use of Albumentations transforms in FastAI.
    '''

    split_idx,order=0,2
    def __init__(self, train_aug): store_attr()

    def encodes(self, img: PILImage):
        aug_img = self.train_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)

In [None]:
set_seed(hyperparameters['SEED'], True)

# Datablock
block = DataBlock(
    blocks=(ImageBlock, RegressionBlock(n_out=h)),
    get_x=ColReader('file_path'),
    get_y=ColReader('label'),
    splitter=ColSplitter(col='is_valid'),
    item_tfms=[
        Resize(hyperparameters['IMG_SIZE'], method='squish'),
        AlbumentationsTransform(albumentations.Compose(hyperparameters['TRANSFORMS']))])

# Dataloaders
dls = block.dataloaders(df, bs=hyperparameters['BS'], shuffle=True)
dls.rng.seed(hyperparameters['SEED'])

# Sanity check
num_classes = dls.c
print('Number of nodes: ', num_classes)

In [None]:
# Show batch
dls.train.show_batch(max_n=16, figsize=(15,12))

In [None]:
# Learner
learn = vision_learner(dls,
                        hyperparameters['ARCH'],
                        normalize=True,
                        pretrained=hyperparameters['PRETRAINED'],
                        opt_func=Adam,
                        metrics=metrics,
                        y_range=(-1,1),
                        wd=hyperparameters['WD']).to_fp16()

# Training

In [None]:
# Find LR
learn.lr_find()

In [None]:
# Set LR
hyperparameters['LR'] = 3e-3

In [None]:
# Train model
learn.fine_tune(hyperparameters['EPOCHS'], base_lr=hyperparameters['LR'], cbs=callbacks)

In [None]:
# Save model
learn.export(f'{save_path}/model.pkl')
learn.save(f'{save_path}/model')