In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd

from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
import torch

from src.config import ProjectPaths, CFG, seed_everything, read_train
from src.ES_model import EssayClassifierModel
from src.ES_dataset import create_dataloaders
from src.training_loop import collate_batch

seed_everything(CFG.seed)

import warnings
warnings.filterwarnings("ignore")

SEED=42

  from .autonotebook import tqdm as notebook_tqdm


# Out-of-fold predictions for deberta-v3-small model on fold 0 (out of 5 folds)

In [3]:
df_fold0 = pd.read_csv('data/train_deberta_small_oof_fold0.csv')
df_clusters = pd.read_csv('data/train_clusters.csv')
df_fold0 = df_fold0.merge(df_clusters[['essay_id', 'clusters7']], on='essay_id')
df_fold0

Unnamed: 0,essay_id,label,deberta,clusters7
0,001bdc0,3,2,2
1,0033037,1,1,5
2,0066c7c,1,2,0
3,0072128,3,3,4
4,0079f2a,1,0,2
...,...,...,...,...
3457,ff74f94,4,5,1
3458,ff988c9,2,2,4
3459,ffcb061,2,2,3
3460,fffb49b,0,0,2


# Cluster sizes for fold 0

In [4]:
(df_fold0['clusters7'].value_counts() / df_fold0.shape[0]).sort_index()

clusters7
0    0.202773
1    0.122761
2    0.182265
3    0.086944
4    0.112652
5    0.170422
6    0.122184
Name: count, dtype: float64

# OOF metric for each cluster

In [10]:
score = cohen_kappa_score(df_fold0['label'], df_fold0['deberta'], weights='quadratic')
print(f'OOF score for fold 0:    {score:,.4f}\n')

for cl in range(7):
    df = df_fold0.query(f'clusters7=={cl}')
    score = cohen_kappa_score(df['label'], df['deberta'], weights='quadratic')
    print(f'OOF score for fold 0 cluster {cl}:    {score:,.4f}')

OOF score for fold 0:    0.8198

OOF score for fold 0 cluster 0:    0.7857
OOF score for fold 0 cluster 1:    0.8359
OOF score for fold 0 cluster 2:    0.8299
OOF score for fold 0 cluster 3:    0.6991
OOF score for fold 0 cluster 4:    0.7795
OOF score for fold 0 cluster 5:    0.8472
OOF score for fold 0 cluster 6:    0.7831
