# Техническая часть

In [None]:
!pip install -q sentencepiece
import sentencepiece
!pip -q install datasets
!pip install -q transformers
!pip install -q comet_ml
import comet_ml
!pip install -q pytorch-lightning
!git clone -q https://github.com/DanilDmitriev1999/JB_internship

In [None]:
import warnings
warnings.filterwarnings('ignore')

from datasets import load_dataset

import re

import numpy as np
from pprint import pprint
from typing import List
import collections
import seaborn as sns
import matplotlib.pyplot as plt
import random

import math
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset,

from transformers import AutoTokenizer, AutoModel, AdamW


import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning import Trainer, seed_everything

from JB_internship.DataModule.CastomDataset import *
from JB_internship.DataModule.DataPrepare import *
from JB_internship.models.DebertaLayerCat import *
from JB_internship.models.RobertaLayerCat import *
from JB_internship.models.Roberta import *
from JB_internship.utils.trainer import *
from JB_internship.loss.FocalLoss import *


seed_everything(294)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Данные

In [None]:
dt = DataExplorer(model_name='microsoft/deberta-base', undersampling=True)
train_data, valid_data, test_data = dt.train_val_test_split()
tokenizer = dt.tokenizer


train_iter = DataLoader(dataset=CastomDataModule(train_data, tokenizer),
                        batch_size=32, shuffle=True)
val_iter = DataLoader(dataset=CastomDataModule(valid_data, tokenizer),
                        batch_size=32)
test_iter = DataLoader(dataset=CastomDataModule(test_data, tokenizer),
                        batch_size=64)

# Model

In [None]:
N_EPOCHS = 5
n_layers = [6, 7, 8]
CLIP = 1

deberta = DebertaLayerCat('microsoft/deberta-base', n_layers)
criterion = FocalLoss().to(device)

model = ModelTrainer(deberta,
                    criterion,
                    ).to(device)

trainer = Trainer(max_epochs=N_EPOCHS,
                gpus=1,
                gradient_clip_val=CLIP,
                progress_bar_refresh_rate=1,
                log_every_n_steps=3,
                )

In [None]:
trainer.fit(model, train_iter, val_iter)

# test

In [None]:
def report(l, p):
    flatten = lambda t: [item for sublist in t for item in sublist]
    l_n = [i for i in flatten(l)]
    p_n = [i for i in flatten(p)]

    print(classification_report(l_n, p_n))

def confis_mtrx(l, p):
    flatten = lambda t: [item for sublist in t for item in sublist]
    l_n = [i for i in flatten(l)]
    p_n = [i for i in flatten(p)]

    print(confusion_matrix(l_n, p_n))

trainer.test(model, test_iter)

In [None]:
pr = model.res
l = pr['label']
p = pr['pred']
report(l, p)
confis_mtrx(l, p)
model.res = {'pred':[], 'label':[]}

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5945
           1       0.79      0.81      0.80       448

    accuracy                           0.97      6393
   macro avg       0.89      0.90      0.89      6393
weighted avg       0.97      0.97      0.97      6393

[[5851   94]
 [  84  364]]


# Save Model

In [None]:
from comet_ml import Experiment

In [None]:
name = 'DeBERTa'

In [None]:
trainer.save_checkpoint(f"/content/save_models/{name}.ckpt")

In [None]:
experiment = Experiment(
    api_key='HWfJT3eyByVJWe4nEbi1pGosA', project_name='jetbrainsinternship',workspace='danildmitriev1999')

In [None]:
experiment.log_model("DeBERTa-JB", f"/content/save_models/{name}.ckpt")