In [1]:
!pip install transformers
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from transformers import RobertaModel, RobertaTokenizer
from sklearn.metrics import roc_auc_score, f1_score, hamming_loss
from transformers import EvalPrediction
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report



In [2]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'L', 'M', 'N', 'Z']

In [3]:
class RoBERTaClass(torch.nn.Module):
    def __init__(self):
        super(RoBERTaClass, self).__init__()
        self.roberta_model = RobertaModel.from_pretrained("roberta-base",return_dict = True)
        self.pre_classifier = torch.nn.Linear(768,768)
        self.dropout = torch.nn.Dropout(0.3)
        self.activation = torch.nn.Tanh()
        self.classifier = torch.nn.Linear(768, 14)

    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.roberta_model(
            input_ids,
            attention_mask=attn_mask,
            token_type_ids=token_type_ids
        )
        output_pre_classifier = self.pre_classifier(output.pooler_output)
        output_dropout = self.dropout(output_pre_classifier)
        output_activation = self.activation(output_dropout)
        output = self.classifier(output_activation)
        return output

model = RoBERTaClass()
model.to(device)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base',truncation=True, do_lower_case=True)

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [4]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.df = df
        self.text = df['Text']
        self.targets = self.df[labels].values
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        title = str(self.text[index])
        title = " ".join(title.split())

        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index])
        }

In [5]:
def output_labels(outputs, threshold=0.5):
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(outputs)
  preds = np.zeros(probs.shape)
  preds[np.where(probs.cpu()>=threshold)] = 1
  return preds

def calculate_accuracy(preds, targets):

    labels = targets.cpu().numpy()
    row_comparison = np.equal(preds, labels)
    num_same_positions = np.sum(row_comparison, axis=1)
    n_correct = np.sum(num_same_positions == preds.shape[1])

    return n_correct

def metrics(predictions, labels):
    y_true = labels
    y_pred = predictions
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None)

    # Calculate micro average
    micro_precision, micro_recall, micro_f1, _ = precision_recall_fscore_support(y_true, y_pred, average='micro')

    # Calculate macro average
    macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

    # Print precision, recall, and F1-score for each class
    for i in range(len(precision)):
        print(f"Class {i}:")
        print(f"  Precision: {precision[i]}")
        print(f"  Recall: {recall[i]}")
        print(f"  F1-Score: {f1[i]}")
        print()

    # Print aggregate metrics
    print("Aggregate Metrics:")
    print(f"  Micro Average:")
    print(f"    Precision: {micro_precision}")
    print(f"    Recall: {micro_recall}")
    print(f"    F1-Score: {micro_f1}")
    print()
    print(f"  Macro Average:")
    print(f"    Precision: {macro_precision}")
    print(f"    Recall: {macro_recall}")
    print(f"    F1-Score: {macro_f1}")

In [9]:
def test_inference(test_data_loader,model):
    total_targets = []
    total_outputs = []
    model.eval()
    for batch_idx, data in tqdm(enumerate(test_data_loader)):
        with torch.no_grad():
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            outputs = model(ids, mask, token_type_ids)
            total_targets.extend(targets.tolist())
            preds = output_labels(outputs,0.5)
            total_outputs.extend(preds)
    metrics(total_outputs,total_targets)
          

In [10]:
from sklearn.metrics import precision_recall_fscore_support
df_test = pd.read_csv('/kaggle/input/test-data/test1.csv')
test_dataset = Dataset(df_test, tokenizer, 256)
test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0
)
model.load_state_dict(torch.load('/kaggle/input/val-model/best_val_roberta_model (1).pth'))
test_inference(test_data_loader,model)

157it [00:53,  2.92it/s]


Class 0:
  Precision: 0.788637266587207
  Recall: 0.8508358336905272
  F1-Score: 0.8185567010309278

Class 1:
  Precision: 0.9706131078224102
  Recall: 0.9826626712328768
  F1-Score: 0.9766007232503723

Class 2:
  Precision: 0.8913286192780052
  Recall: 0.9000375798571966
  F1-Score: 0.8956619296933434

Class 3:
  Precision: 0.9316965690903368
  Recall: 0.9340485957715368
  F1-Score: 0.9328710999054524

Class 4:
  Precision: 0.8316628701594533
  Recall: 0.933282208588957
  F1-Score: 0.8795470970850398

Class 5:
  Precision: 0.8070776255707762
  Recall: 0.7881828316610925
  F1-Score: 0.7975183305132544

Class 6:
  Precision: 0.8124833288877034
  Recall: 0.9114302812687014
  F1-Score: 0.8591171908052461

Class 7:
  Precision: 0.4962686567164179
  Recall: 0.24181818181818182
  F1-Score: 0.32518337408312953

Class 8:
  Precision: 0.7721179624664879
  Recall: 0.5207956600361664
  F1-Score: 0.6220302375809935

Class 9:
  Precision: 0.6486988847583643
  Recall: 0.6221033868092691
  F1-Score: 

In [11]:
df_test

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,L,M,N,Z,Text
0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,Infertility in a Maltese poodle as a result of...
1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,Conebulization of surfactant and urokinase res...
2,0,1,0,0,1,1,1,1,1,0,0,0,1,0,A pilot study of the mental workload of object...
3,0,1,1,0,1,1,1,1,0,0,1,1,1,0,Validation of a psychophysiological waking ere...
4,1,1,0,1,1,0,1,0,0,0,1,0,0,0,Met-Gly-Cys motif from G-protein alpha subunit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,1,1,1,1,1,0,1,0,0,0,0,1,0,0,The effects on synovial permeability and synov...
4996,0,1,1,1,1,0,1,0,0,0,0,0,0,0,[Characteristics of the mitotic cycle of cells...
4997,0,1,1,0,1,1,0,0,1,1,0,1,1,0,Initial learning curve for robot-assisted part...
4998,1,1,0,1,0,0,1,0,0,0,0,0,0,0,Action of fatty acids on the exocrine pancreat...
