In [None]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [None]:
import os

import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# example dataset
class CustomImageDataset(Dataset):
    def __init__(self, sentences_path, sep):
        self.sentences = pd.read_csv(sentences_path, sep=sep)['sentence']

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        data = {'sentence': self.sentences[idx]}
        return data
batch_size = 4
train_dataset = CustomImageDataset('klej_polemo2.0-in/train.tsv', sep='\t')
train_loader = DataLoader(train_dataset, batch_size=batch_size)

test_dataset = CustomImageDataset('klej_polemo2.0-in/dev.tsv', sep='\t')
test_loader = DataLoader(test_dataset, batch_size=batch_size)

loaders  = {'train': train_loader, 'test': test_loader}

batch = next(iter(test_loader))
batch

In [None]:
from trainer.utils import get_teacher_student_tokenizer
teacher, student, tokenizer = get_teacher_student_tokenizer()

In [None]:
from trainer.distilTrainer import DistilTrainer

# from accelerate import Accelerator
# accelerator = Accelerator()
# device = accelerator.device
# student, optim, my_training_dataloader = accelerator.prepare(
# my_model, my_optimizer, my_training_dataloader)

params_trainer = {
    'teacher': teacher.to(device),
    'student': student.to(device),
    'tokenizer': tokenizer,
    'loaders': loaders,
    'criterion1': nn.CrossEntropyLoss().to(device),
    'criterion2': nn.CrossEntropyLoss().to(device),
    # 'criterion2': nn.KLDivLoss('batchmean').to(device), # mam używać log_target?
    'criterion3': nn.CosineEmbeddingLoss().to(device),
    'optim': torch.optim.AdamW(student.parameters(), lr=1e-3, weight_decay=0.0), # wyrzucić z wd embedingi i batchnormalization
    # 'accelerator': Accelerator(),
    'device': device
}
trainer = DistilTrainer(**params_trainer)

In [None]:
%tensorboard --logdir=exps

In [None]:
params_run = {
    'epoch_start': 0,
    'epoch_end': 2,
    'exp_name': 'plain_distil',
    'save_interval': 100,
    'fp16': False,
    'random_seed': 42
}

trainer.run_exp(**params_run)