In [1]:
import wandb
import torch

from src.config import ProjectPaths, CFG, seed_everything

seed_everything(CFG.seed)

from src.experiment import experiment
import warnings
warnings.filterwarnings("ignore")

SEED=42

wandb.login()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
fold = 0
model_name = 'microsoft/deberta-v3-small'

config = {
          'project_name': 'EssayScoring_LLM_features',
          'run_name': f'{model_name.split("/")[-1]}_cluster3_unfeeze2_fold_{fold}',
          'create_dataloaders_func': 'EssayClassifier',
          'data_parameters': {
              'model_name': model_name,
              'batch_size': 1,      
              'fold': fold,
              'path_df': ProjectPaths.train_cluster3       
          }  ,
          'loss': 'CrossEntropy',
          'optimizer': 'AdamW', 
          'learning_rate': 1e-5,
          'epochs': 10,
          'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          'model': 'EssayClassifier',
          'model_parameters': {
              'model_name': model_name,
          }
}

In [4]:
experiment(config)

 ---------------------------------------------------------------------------------------------------- 
   Number of trainable parameters in model:  4614 
 ----------------------------------------------------------------------------------------------------
[1m[38;5;254m[48;5;240m Epoch 0 [0m


  training batch:    mean loss  1.69584     kappa  0.18675: 100%|██████████| 1318/1318 [01:37<00:00, 13.51it/s]
validation batch:    mean loss  1.36372     kappa -0.19048: 100%|██████████| 330/330 [00:21<00:00, 15.14it/s]


	train:       loss  1.69584      metric  0.00922
	validation:  loss  1.36372      metric -0.10293
[1m[38;5;254m[48;5;240m Epoch 1 [0m


  training batch:    mean loss  1.29693     kappa -0.10526: 100%|██████████| 1318/1318 [01:37<00:00, 13.45it/s]
validation batch:    mean loss  1.22975     kappa -0.22378: 100%|██████████| 330/330 [00:22<00:00, 14.87it/s]


	train:       loss  1.29693      metric -0.10742
	validation:  loss  1.22975      metric -0.00961
 ---------------------------------------------------------------------------------------------------- 
   Number of trainable parameters in model:  141308934 
 ----------------------------------------------------------------------------------------------------
[1m[38;5;254m[48;5;240m Epoch 2 [0m


  training batch:    mean loss  0.99705     kappa  0.48980: 100%|██████████| 1318/1318 [06:54<00:00,  3.18it/s]
validation batch:    mean loss  0.87500     kappa  0.57746: 100%|██████████| 330/330 [00:22<00:00, 14.90it/s]


	train:       loss  0.99705      metric  0.43709
	validation:  loss  0.87500      metric  0.50725
[1m[38;5;254m[48;5;240m Epoch 3 [0m


  training batch:    mean loss  0.73417     kappa  0.76316: 100%|██████████| 1318/1318 [06:53<00:00,  3.19it/s]
validation batch:    mean loss  0.76564     kappa  0.71338: 100%|██████████| 330/330 [00:22<00:00, 14.95it/s]


	train:       loss  0.73417      metric  0.69763
	validation:  loss  0.76564      metric  0.65819
[1m[38;5;254m[48;5;240m Epoch 4 [0m


  training batch:    mean loss  0.63746     kappa  0.78049: 100%|██████████| 1318/1318 [06:50<00:00,  3.21it/s]
validation batch:    mean loss  0.74630     kappa  0.74286: 100%|██████████| 330/330 [00:21<00:00, 15.22it/s]


	train:       loss  0.63746      metric  0.76485
	validation:  loss  0.74630      metric  0.69684
[1m[38;5;254m[48;5;240m Epoch 5 [0m


  training batch:    mean loss  0.53372     kappa  0.81670: 100%|██████████| 1318/1318 [06:48<00:00,  3.22it/s]
validation batch:    mean loss  0.71328     kappa  0.79042: 100%|██████████| 330/330 [00:21<00:00, 15.18it/s]


	train:       loss  0.53372      metric  0.81872
	validation:  loss  0.71328      metric  0.71423
[1m[38;5;254m[48;5;240m Epoch 6 [0m


  training batch:    mean loss  0.44287     kappa  0.92116: 100%|██████████| 1318/1318 [06:51<00:00,  3.20it/s]
validation batch:    mean loss  0.75267     kappa  0.80226: 100%|██████████| 330/330 [00:22<00:00, 14.79it/s]


	train:       loss  0.44287      metric  0.85001
	validation:  loss  0.75267      metric  0.70827
[1m[38;5;254m[48;5;240m Epoch 7 [0m


  training batch:    mean loss  0.34015     kappa  0.85437: 100%|██████████| 1318/1318 [06:57<00:00,  3.16it/s]
validation batch:    mean loss  0.83149     kappa  0.79042: 100%|██████████| 330/330 [00:22<00:00, 14.96it/s]


	train:       loss  0.34015      metric  0.89251
	validation:  loss  0.83149      metric  0.69860
[1m[38;5;254m[48;5;240m Epoch 8 [0m


  training batch:    mean loss  0.27052     kappa  0.95816: 100%|██████████| 1318/1318 [06:53<00:00,  3.19it/s]
validation batch:    mean loss  0.86357     kappa  0.77987: 100%|██████████| 330/330 [00:21<00:00, 15.03it/s]


	train:       loss  0.27052      metric  0.92288
	validation:  loss  0.86357      metric  0.71500
[1m[38;5;254m[48;5;240m Epoch 9 [0m


  training batch:    mean loss  0.21492     kappa  0.97778: 100%|██████████| 1318/1318 [06:53<00:00,  3.19it/s]
validation batch:    mean loss  1.02369     kappa  0.79381: 100%|██████████| 330/330 [00:22<00:00, 14.96it/s]


	train:       loss  0.21492      metric  0.93695
	validation:  loss  1.02369      metric  0.68067


0,1
loss_train,█▆▅▃▃▃▂▂▁▁
loss_valid,█▇▃▂▁▁▁▂▃▄
metric_train,▂▁▅▆▇▇▇███
metric_valid,▁▂▆███████

0,1
loss_train,0.21492
loss_valid,1.02369
metric_train,0.93695
metric_valid,0.68067


In [5]:
# from tqdm import tqdm
# import numpy as np

# device = torch.device('cuda')

# checkpoint = torch.load('trained_models/441a4m4s_checkpoint.pth')

# model = EssayClassifierModel('microsoft/deberta-v3-xsmall')
# model.load_state_dict(checkpoint['model_state_dict'])
# model = model.to(device)

# dataloader_train, dataloader_valid = create_dataloaders(ProjectPaths.train, fold=0, model_name='microsoft/deberta-v3-xsmall', batch_size=1)


# outputs = np.empty(len(dataloader_valid), dtype=int)
# targets = np.empty(len(dataloader_valid), dtype=int)

# for b, batch in enumerate(tqdm(dataloader_valid)):
#     output = model(collate_batch(batch['inputs']).to(device)).cpu().detach().numpy().argmax(axis=1)
#     target = batch['labels'].cpu().detach().numpy()
#     outputs[b] = output
#     targets[b] = target

# print('outputs', outputs)
# print('targets', targets)
# score = cohen_kappa_score(targets, outputs, weights='quadratic')
# print('score', score)