In [None]:
!pip install --no-cache-dir transformers sentencepiece

In [None]:
import numpy as np
import pandas as pd
import random
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch import autocast

import transformers
from transformers import TrainingArguments, Trainer
from transformers import AutoTokenizer, AutoConfig, AutoModel
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def seed_everything(seed: int):

    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(0)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x7e6c07dc0590>

In [None]:
class MeanHead(nn.Module):
    def __init__(self, hidden_size: int, num_hidden_layers: int):
        super(MeanHead, self).__init__()

        self.linear_output = nn.Sequential(
                                nn.Dropout(p = 0.2),
                                nn.Linear(hidden_size, 3)
                              )

    def forward(self, head_inputs: dict):

        features = self.get_features(head_inputs)
        output = self.linear_output(features)

        return output

    def get_features(self, head_inputs: dict):

        last_hidden_state = head_inputs['output_model'][0]
        attention_mask = head_inputs['attention_mask']

        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask

        return mean_embeddings

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model_path: str, layers_freeze: int):
        super().__init__()

        self.config_model = AutoConfig.from_pretrained(model_path)
        self.config_model.attention_probs_dropout_prob = 0
        self.config_model.hidden_dropout_prob = 0

        self.model = AutoModel.from_pretrained(model_path, config=self.config_model)
        self.hidden_size = self.config_model.hidden_size
        self.num_hidden_layers = self.config_model.num_hidden_layers

        if layers_freeze > 0:
            if layers_freeze == self.num_hidden_layers:
                print(f'Freezing all model')
                self.model.requires_grad_(False)
            else:
                print(f'Freezing the first {layers_freeze} layers')
                self.freeze_layers(layers_freeze)

        self.head = MeanHead(self.hidden_size, self.num_hidden_layers)

    def freeze_layers(self, layers: int):

        self.model.embeddings.requires_grad_(False)
        self.model.encoder.layer[:layers].requires_grad_(False)

    def take_features(self, inputs):
        output_model = self.model(**inputs, return_dict=False, output_hidden_states = False)

        inputs['output_model'] = output_model

        return inputs

    def forward(self, inputs):

        features = self.take_features(inputs)

        return self.head(features)

In [None]:
def tokenize_samples(samples, CFG):

  tokenized = CFG['TOKENIZER'](
            samples,
            add_special_tokens=True,
            max_length=CFG['MAX_LEN'],
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            verbose=False
        )

  inputs = {'input_ids': torch.tensor(tokenized['input_ids'], dtype=torch.long),
                  'attention_mask': torch.tensor(tokenized['attention_mask'], dtype=torch.long)}

  return inputs

In [None]:
def inference(samples, model, CFG):

  inputs = tokenize_samples(samples, CFG)

  inputs = {k:inputs[k].to(device=CFG['DEVICE']) for k in inputs.keys()}

  model.eval()
  with torch.no_grad():
    with autocast(device_type=CFG['DEVICE'], dtype=torch.float16):
      output = model(inputs)

  preds = torch.argmax(F.softmax(output, 1), 1).cpu().detach().numpy()

  return preds

In [None]:
model_path = '/content/drive/MyDrive/PLN - Projeto/DebertaV3'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)

In [None]:
Configs = {
    'MAX_LEN':  256,
    'MODEL_PATH': '/content/drive/MyDrive/PLN - Projeto/DebertaV3',
    'DEVICE': 'cuda' if torch.cuda.is_available() else 'cpu',
    'TOKENIZER': tokenizer
}

In [None]:
model = CustomModel(Configs['MODEL_PATH'], 0)
model.to(Configs['DEVICE'])

model.load_state_dict(torch.load('/content/drive/MyDrive/PLN - Projeto/deberta_best.pth'))

<All keys matched successfully>

In [None]:
samples = [
            'exemplo'
]

In [None]:
inference(samples, model, Configs)