# Clone the <b>simpletransformers</b> library 

In [1]:
! git clone https://github.com/ThilinaRajapakse/simpletransformers.git
%cd simpletransformers

Cloning into 'simpletransformers'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 8489 (delta 51), reused 65 (delta 32), pack-reused 8400[K
Receiving objects: 100% (8489/8489), 19.50 MiB | 40.09 MiB/s, done.
Resolving deltas: 100% (5551/5551), done.
/content/simpletransformers


# The function for Focal Loss

In [2]:
%%writefile /content/simpletransformers/simpletransformers/classification/transformer_models/focal_loss.py

from typing import Optional, Sequence

import torch
from torch import Tensor
from torch import nn
from torch.nn import functional as F


class FocalLoss(nn.Module):
    """ Focal Loss, as described in https://arxiv.org/abs/1708.02002.
    It is essentially an enhancement to cross entropy loss and is
    useful for classification tasks when there is a large class imbalance.
    x is expected to contain raw, unnormalized scores for each class.
    y is expected to contain class labels.
    Shape:
        - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0.
        - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0.
    """

    def __init__(self,
                 alpha: Optional[Tensor] = None,
                 gamma: float = 0.,
                 reduction: str = 'mean',
                 ignore_index: int = -100):
        """Constructor.
        Args:
            alpha (Tensor, optional): Weights for each class. Defaults to None.
            gamma (float, optional): A constant, as described in the paper.
                Defaults to 0.
            reduction (str, optional): 'mean', 'sum' or 'none'.
                Defaults to 'mean'.
            ignore_index (int, optional): class label to ignore.
                Defaults to -100.
        """
        if reduction not in ('mean', 'sum', 'none'):
            raise ValueError(
                'Reduction must be one of: "mean", "sum", "none".')

        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
        self.reduction = reduction

        self.nll_loss = nn.NLLLoss(
            weight=alpha, reduction='none', ignore_index=ignore_index)

    def __repr__(self):
        arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction']
        arg_vals = [self.__dict__[k] for k in arg_keys]
        arg_strs = [f'{k}={v}' for k, v in zip(arg_keys, arg_vals)]
        arg_str = ', '.join(arg_strs)
        return f'{type(self).__name__}({arg_str})'

    def forward(self, x: Tensor, y: Tensor) -> Tensor:
        if x.ndim > 2:
            # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C)
            c = x.shape[1]
            x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c)
            # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,)
            y = y.view(-1)

        unignored_mask = y != self.ignore_index
        y = y[unignored_mask]
        if len(y) == 0:
            return 0.
        x = x[unignored_mask]

        # compute weighted cross entropy term: -alpha * log(pt)
        # (alpha is already part of self.nll_loss)
        log_p = F.log_softmax(x, dim=-1)
        ce = self.nll_loss(log_p, y)

        # get true class column from each row
        all_rows = torch.arange(len(x))
        log_pt = log_p[all_rows, y]

        # compute focal term: (1 - pt)^gamma
        pt = log_pt.exp()
        focal_term = (1 - pt)**self.gamma

        # the full loss: -alpha * ((1 - pt)^gamma) * log(pt)
        loss = focal_term * ce

        if self.reduction == 'mean':
            loss = loss.mean()
        elif self.reduction == 'sum':
            loss = loss.sum()

        return loss


def focal_loss(alpha: Optional[Sequence] = None,
               gamma: float = 0.,
               reduction: str = 'mean',
               ignore_index: int = -100,
               device='cpu',
               dtype=torch.float32) -> FocalLoss:
    """Factory function for FocalLoss.
    Args:
        alpha (Sequence, optional): Weights for each class. Will be converted
            to a Tensor if not None. Defaults to None.
        gamma (float, optional): A constant, as described in the paper.
            Defaults to 0.
        reduction (str, optional): 'mean', 'sum' or 'none'.
            Defaults to 'mean'.
        ignore_index (int, optional): class label to ignore.
            Defaults to -100.
        device (str, optional): Device to move alpha to. Defaults to 'cpu'.
        dtype (torch.dtype, optional): dtype to cast alpha to.
            Defaults to torch.float32.
    Returns:
        A FocalLoss object
    """
    if alpha is not None:
        if not isinstance(alpha, Tensor):
            alpha = torch.tensor(alpha)
        alpha = alpha.to(device=device, dtype=dtype)

    fl = FocalLoss(
        alpha=alpha,
        gamma=gamma,
        reduction=reduction,
        ignore_index=ignore_index)
    return fl

Writing /content/simpletransformers/simpletransformers/classification/transformer_models/focal_loss.py


## Overwriting the default DistilmBERT file to accomodate the new Focal loss 

In [3]:
%%writefile /content/simpletransformers/simpletransformers/classification/transformer_models/distilbert_model.py


import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from .focal_loss import focal_loss
from transformers.models.distilbert.modeling_distilbert import DistilBertModel, DistilBertPreTrainedModel


class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
    r"""
        **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
            Labels for computing the sequence classification/regression loss.
            Indices should be in ``[0, ..., config.num_labels - 1]``.
            If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
            If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
        **loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
            Classification (or regression if config.num_labels==1) loss.
        **logits**: ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)``
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
            list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
            of shape ``(batch_size, sequence_length, hidden_size)``:
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        **attentions**: (`optional`, returned when ``config.output_attentions=True``)
            list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
    Examples::
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
        model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0)  # Batch size 1
        labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids, labels=labels)
        loss, logits = outputs[:2]
    """  # noqa: ignore flake8"

    def __init__(self, config, weight=None):
        super(DistilBertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.weight = weight

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = nn.Linear(config.dim, config.dim)
        self.classifier = nn.Linear(config.dim, config.num_labels)
        self.dropout = nn.Dropout(config.seq_classif_dropout)

        self.init_weights()

    def forward(
        self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None, class_weights=None,
    ):
        distilbert_output = self.distilbert(input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask)
        hidden_state = distilbert_output[0]  # (bs, seq_len, dim)
        pooled_output = hidden_state[:, 0]  # (bs, dim)
        pooled_output = self.pre_classifier(pooled_output)  # (bs, dim)
        pooled_output = nn.ReLU()(pooled_output)  # (bs, dim)
        pooled_output = self.dropout(pooled_output)  # (bs, dim)
        logits = self.classifier(pooled_output)  # (bs, dim)

        outputs = (logits,) + distilbert_output[1:]
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                if self.weight is not None:
                    weight = self.weight.to(labels.device)
                else:
                    weight = None
                # loss_fct = CrossEntropyLoss(weight=weight)
                # loss_fct2 = focal_loss(alpha=weight, device=labels.device)
                loss_fct2 = focal_loss(device=labels.device)
                loss = loss_fct2(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

Overwriting /content/simpletransformers/simpletransformers/classification/transformer_models/distilbert_model.py


# Define the requirements file

In [4]:
%%writefile req.txt

numpy
requests
tqdm>=4.47.0
regex
transformers>=4.0.0
scipy
scikit-learn
seqeval
tensorboardx
pandas
tokenizers==0.9.4
wandb
streamlit
sentencepiece

Writing req.txt


# Download and install the requirements(restart runtime if using Google Colab)

In [None]:
! pip install -r req.txt

In [None]:
%cd simpletransformers/
! python setup.py install

# Download the datasets(files converted to xlsx)

In [2]:
## Tamil
! gdown https://drive.google.com/uc?id=10pPg_WI0Qzgvi-qwxcyWbtfqYda0DsoM
! gdown https://drive.google.com/uc?id=1iF4sZ1XFL4pG6YVGrPWEsN1NMiPDQ0cA
! gdown https://drive.google.com/uc?id=1CgoMCL-ZKda6G8xfrVeNPPiSt2S6hBio


## Malayalam
! gdown https://drive.google.com/uc?id=1aA-cxg_iRtM83NgCDSluBIL7fsNUiJg_
! gdown https://drive.google.com/uc?id=1y50Xnd685oCoziVytpVtBcJ8CZizE5Nt
! gdown https://drive.google.com/uc?id=1h7vrLgccRuEanpDipHMFv2q4ptX65HlW


## Kannada
! gdown https://drive.google.com/uc?id=1k6on-7xMJ6zyaFpCxrV3CZii4y1UD4KP
! gdown https://drive.google.com/uc?id=1Dx-TByQ2gIjvHmmNJTA-Aj8rqwGjHo7i
! gdown https://drive.google.com/uc?id=1zG-K2hdpx4n-Geqpww7s5bd8CYCgeJBH

Downloading...
From: https://drive.google.com/uc?id=10pPg_WI0Qzgvi-qwxcyWbtfqYda0DsoM
To: /content/simpletransformers/tamil_offensive_full_train.xlsx
100% 1.72M/1.72M [00:00<00:00, 27.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1iF4sZ1XFL4pG6YVGrPWEsN1NMiPDQ0cA
To: /content/simpletransformers/tamil_offensive_full_dev.xlsx
100% 221k/221k [00:00<00:00, 14.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CgoMCL-ZKda6G8xfrVeNPPiSt2S6hBio
To: /content/simpletransformers/tamil_offensive_full_test_with_labels.xlsx
100% 218k/218k [00:00<00:00, 7.04MB/s]
Downloading...
From: https://drive.google.com/uc?id=1aA-cxg_iRtM83NgCDSluBIL7fsNUiJg_
To: /content/simpletransformers/mal_full_offensive_train.xlsx
100% 629k/629k [00:00<00:00, 9.96MB/s]
Downloading...
From: https://drive.google.com/uc?id=1y50Xnd685oCoziVytpVtBcJ8CZizE5Nt
To: /content/simpletransformers/mal_full_offensive_dev.xlsx
100% 101k/101k [00:00<00:00, 3.20MB/s]
Downloading...
From: https://drive.google.com/uc?i

# Import required modules

In [3]:
import pandas as pd
import numpy as np
from simpletransformers.classification import ClassificationModel
from sklearn.metrics import f1_score, accuracy_score, classification_report, matthews_corrcoef
from sklearn.metrics import precision_recall_fscore_support as score
import os
import tarfile
import warnings 

warnings.filterwarnings('ignore')

# Choose language 

In [5]:
lang = input('Choose language: 1 for tamil, 2 for malayalam, 3 for kannada: ')
class_list = ['Not_offensive',
 'Offensive_Targeted_Insult_Group',
 'Offensive_Targeted_Insult_Individual',
 'Offensive_Targeted_Insult_Other',
 'Offensive_Untargetede']


train_file_name = None
dev_file_name = None

if lang == '1': 
    lang = 'Tamil'
    class_list.append('not-Tamil')
    train_file_name = 'tamil_offensive_full_train.xlsx'
    dev_file_name = 'tamil_offensive_full_dev.xlsx'
    test_file_name = 'tamil_offensive_full_test_with_labels.xlsx'
elif lang == '2': 
    lang = 'malayalam'
    # As it contains only 5 classes ('Offensive_Targeted_Insult_Other' is not present)
    class_list.append('not-malayalam')
    class_list.remove('Offensive_Targeted_Insult_Other')

    train_file_name = 'mal_full_offensive_train.xlsx'
    dev_file_name = 'mal_full_offensive_dev.xlsx'
    test_file_name = 'mal_full_offensive_test_with_labels.xlsx'
elif lang == '3': 
    lang = 'Kannada'
    class_list.append('not-Kannada')
    train_file_name = 'kannada_offensive_train.xlsx'
    dev_file_name = 'kannada_offensive_dev.xlsx'
    test_file_name = 'kannada_offensive_test_with_labels.xlsx'

Choose language: 1 for tamil, 2 for malayalam, 3 for kannada: 3


# Load data into dataframes

In [6]:
train_df = pd.read_excel(train_file_name, header=None)
train_df.columns = ['Input', 'Label']
train_df = train_df.dropna().drop_duplicates().reset_index(drop=True, inplace=False)

dev_df = pd.read_excel(dev_file_name, header=None)
dev_df.columns = ['Input', 'Label']

test_df = pd.read_excel(test_file_name, header=None, engine='openpyxl')
test_df.columns = ['Input', 'Label']

# Labels mapped to integers
train_df['Label'] = train_df.apply(lambda x:  class_list.index(x['Label']),axis=1)
dev_df['Label'] = dev_df.apply(lambda x:  class_list.index(x['Label']),axis=1)
test_df['Label'] = test_df.apply(lambda x:  class_list.index(x['Label']),axis=1)


print(f'Number of exmaples in the train set: {train_df.shape[0]}')
print(f'Number of exmaples in the validation set: {dev_df.shape[0]}')
print(f'Number of exmaples in the test set: {test_df.shape[0]}')

Number of exmaples in the train set: 5936
Number of exmaples in the validation set: 777
Number of exmaples in the test set: 778


# How sample data looks like.

In [7]:
test_df.head(10)

Unnamed: 0,Input,Label
0,ಜೈ ಮೋರಿ ಜೈ ರೋಗಿ ಇಸ್ಲಾಂ ಸಾಯಿಸಿ ಇದೆ ಇವರ ಗುರಿ,1
1,Anna nim e vedio nodinu mathe chaina apps use ...,1
2,Super anna yenu thappila yela sari agi hellidi...,0
3,Super sir super super super super,5
4,AGT-KELU (KANNADA RAP 2019),5
5,Kuladalli keelyavudo huchappa,2
6,ಶೇಕಡ 100(%) ರಷ್ಟು viruses na China ಉತ್ಪನ್ನ ಮಾಡ...,1
7,Aagle tv li haaaktidaaralla guru.....,0
8,Hindi video song . Yavaga release,0
9,@Nandi Parthasarathi ನಿ ಮುಂಡೆ,2


# Class-weighting with inverse of #samples in the class

In [8]:
inverse_weights = np.array(train_df['Label'].value_counts().sort_index())
weights = np.sum(inverse_weights) / inverse_weights

# Load pre-trained model

In [9]:
# define hyperparameters
train_args ={"reprocess_input_data": True,
             "overwrite_output_dir": True,
             "fp16":False,
             "num_train_epochs": 10, # run for 10 epochs
             "no_save": True, # don't save the weights after each iteration as it exceeds the runtime memory allowed
             }

class_weighting = input('Do you want to use class weighting:\nPress\nY for Yes\nN for No: ')

if class_weighting.lower() == 'n':
    # Create a Classification Model (WITHOUT USING CLASS-WEIGHTING)
    print('Model not using class-weighting')
    model = ClassificationModel(
        "distilbert", "distilbert-base-multilingual-cased",
        num_labels=len(class_list),
        args=train_args
)

else:
    # Create a Classification Model (USING CLASS-WEIGHTING)
    print('Model using class-weighting')
    model = ClassificationModel(
        "distilbert", "distilbert-base-multilingual-cased",
        num_labels=len(class_list),
        weight=list(weights),
        args=train_args
    )

Do you want to use class weighting:
Press
Y for Yes
N for No: Y
Model using class-weighting


Downloading:   0%|          | 0.00/466 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/542M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-multilingual-cased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['pre_classifier.weight', 'pre_cla

Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]

# Train the model

In [10]:
def to_str(text):
    tt = text['Input']
    if not isinstance(tt, str):
        tt = str(tt)
    return pd.Series([tt, text['Label']])

model.train_model(train_df.apply(lambda row: to_str(row), axis=1))

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/742 [00:00<?, ?it/s]

(742, 0.9540320838879383)

# Check performance over test set

## Metrics

In [11]:
# precision_recall_fscore_support
def scr(labels, preds):
    return score(labels, preds, average='macro')

# f1 score
def f1_multiclass(labels, preds):
    return f1_score(labels, preds, average='micro')
    
# classification report
def classification_rprt(labels, preds):
    return classification_report(labels, preds, output_dict=True, target_names=class_list)

# matthews correlation coefficient 
def mathews_coff(labels, pred):
    return matthews_corrcoef(labels, pred)

## Evaluate over the validation set

In [12]:
result, model_outputs, wrong_predictions = model.eval_model(dev_df.apply(lambda x: to_str(x), axis=1), f1=f1_multiclass, acc=accuracy_score, 
                                                            cls_report=classification_rprt, mathews_coff = mathews_coff, score=scr)

print(f"Val acc: {result['acc']}\nEval_loss: {result['eval_loss']}\nf1 score: {result['f1']}\nMatthews corrcoeff: {result['mathews_coff']}")

Running Evaluation:   0%|          | 0/98 [00:00<?, ?it/s]

Val acc: 0.7117117117117117
Eval_loss: 0.8932422527245113
f1 score: 0.7117117117117117
Matthews corrcoeff: 0.50141418261308


## The Classification report

In [13]:
pd.DataFrame(result['cls_report']).transpose()

Unnamed: 0,precision,recall,f1-score,support
Not_offensive,0.714563,0.86385,0.782147,426.0
Offensive_Targeted_Insult_Group,0.0,0.0,0.0,45.0
Offensive_Targeted_Insult_Individual,0.659091,0.439394,0.527273,66.0
Offensive_Targeted_Insult_Other,0.0,0.0,0.0,16.0
Offensive_Untargetede,0.0,0.0,0.0,33.0
not-Kannada,0.715596,0.816754,0.762836,191.0
accuracy,0.711712,0.711712,0.711712,0.711712
macro avg,0.348208,0.353333,0.345376,777.0
weighted avg,0.623659,0.711712,0.661128,777.0


## The precision, recall and f1 scores over the dev-set



In [14]:
print(f"Precision: \t{result['score'][0]} \nRecall: \t{result['score'][1]} \nF1-score: \t{result['score'][2]}")

Precision: 	0.34820839102704254 
Recall: 	0.35333293855895437 
F1-score: 	0.34537592759819025


# Make a dataframe to write the results to an excel file

In [15]:
if lang == 'Kannada' or lang == 'Tamil':
    result_df = pd.concat([pd.DataFrame({'Input': [test_df.iloc[i]['Input']],
                            'Correct Label': [class_list[test_df.iloc[i]['Label']]], 
                            'Predicted Label': [class_list[np.argmax(model_outputs[i])]],
                            'Score_Not_offensive': [model_outputs[i][0]],
                            'Score_Offensive_Targeted_Insult_Group': [model_outputs[i][1]],
                            'Score_Offensive_Targeted_Insult_Individual': [model_outputs[i][2]],
                            'Score_Offensive_Targeted_Insult_Other': [model_outputs[i][3]],
                            'Score_Offensive_Untargetede': [model_outputs[i][4]],
                            f'Score_not-{lang}': [model_outputs[i][5]]
                            
                            }) for i, j in enumerate(model_outputs)],  ignore_index=True)
    
else:
    result_df = pd.concat([pd.DataFrame({'Input': [test_df.iloc[i]['Input']],
                            'Correct Label': [class_list[test_df.iloc[i]['Label']]], 
                            'Predicted Label': [class_list[np.argmax(model_outputs[i])]],
                            'Score_Not_offensive': [model_outputs[i][0]],
                            'Score_Offensive_Targeted_Insult_Group': [model_outputs[i][1]],
                            'Score_Offensive_Targeted_Insult_Individual': [model_outputs[i][2]],
                            'Score_Offensive_Untargetede': [model_outputs[i][3]],
                            f'Score_not-{lang}': [model_outputs[i][4]]
                            
                            }) for i, j in enumerate(model_outputs)],  ignore_index=True)

## Normalize the scores

In [16]:
# To normalize the scores between 0 and 1 (both inclusive)
req_cols = [col for col in result_df.columns if col.startswith('Score')]

def normalize(row):
    vals = np.array([row[col] for col in req_cols])
    req_vals = np.exp(vals)/sum(np.exp(vals))
    for i,col in enumerate(req_cols):
        row[col] = req_vals[i]

    return pd.Series(row)

# Write results to file

In [17]:
result_df = result_df.apply(lambda row: normalize(row), axis=1)
result_df.to_excel(f'mbert_val_{lang}_without_pseudo-labelling_focalloss.xlsx', encoding='utf-8')

# Pseudo-labelling

### 1) Obtain predictions over test-set

In [18]:
test_df['Input'] = test_df['Input'].apply(lambda val: str(val))
predictions, raw_outputs = model.predict(test_df['Input'])

  0%|          | 0/98 [00:00<?, ?it/s]

### 2) Combine the test-set inputs and predictions obtained with the train-set

In [19]:
test_df['Label'] = predictions
pseudo_label_df = pd.concat([train_df, test_df], axis=0).reset_index(drop=True)

### 3) Train the model again with pseudo-labelled data included 

In [20]:
model.train_model(pseudo_label_df)

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/840 [00:00<?, ?it/s]

(840, 0.7296889090467067)

# Check performance over dev-set with the new model

### Evaluate over the validation set

In [21]:
result, model_outputs, wrong_predictions = model.eval_model(dev_df.apply(lambda x: to_str(x), axis=1), f1=f1_multiclass, acc=accuracy_score, 
                                                            cls_report=classification_rprt, mathews_coff = mathews_coff, score=scr)

print(f"Val acc: {result['acc']}\nEval_loss: {result['eval_loss']}\nf1 score: {result['f1']}\nMatthews corrcoeff: {result['mathews_coff']}")

Running Evaluation:   0%|          | 0/98 [00:00<?, ?it/s]

Val acc: 0.7168597168597168
Eval_loss: 0.8988328568485319
f1 score: 0.7168597168597168
Matthews corrcoeff: 0.5143396166147888


## The Classification Report

In [22]:
pd.DataFrame(result['cls_report']).transpose()

Unnamed: 0,precision,recall,f1-score,support
Not_offensive,0.72619,0.859155,0.787097,426.0
Offensive_Targeted_Insult_Group,0.4,0.088889,0.145455,45.0
Offensive_Targeted_Insult_Individual,0.653846,0.515152,0.576271,66.0
Offensive_Targeted_Insult_Other,0.0,0.0,0.0,16.0
Offensive_Untargetede,0.0,0.0,0.0,33.0
not-Kannada,0.725118,0.801047,0.761194,191.0
accuracy,0.71686,0.71686,0.71686,0.71686
macro avg,0.417526,0.377374,0.378336,777.0
weighted avg,0.655095,0.71686,0.676024,777.0


## The precision, recall and f1 scores over the dev-set

In [23]:
print(f"Precision: {result['score'][0]} \nRecall: {result['score'][1]} \nF1-score: {result['score'][2]}")

Precision: 0.417525852241492 
Recall: 0.3773737423394528 
F1-score: 0.378336089323253


# Make a dataframe to write the results to an excel file

In [24]:
if lang == 'Kannada' or lang == 'Tamil':
    result_df = pd.concat([pd.DataFrame({'Input': [dev_df.iloc[i]['Input']],
                            'Correct Label': [class_list[dev_df.iloc[i]['Label']]], 
                            'Predicted Label': [class_list[np.argmax(model_outputs[i])]],
                            'Score_Not_offensive': [model_outputs[i][0]],
                            'Score_Offensive_Targeted_Insult_Group': [model_outputs[i][1]],
                            'Score_Offensive_Targeted_Insult_Individual': [model_outputs[i][2]],
                            'Score_Offensive_Targeted_Insult_Other': [model_outputs[i][3]],
                            'Score_Offensive_Untargetede': [model_outputs[i][4]],
                            f'Score_not-{lang}': [model_outputs[i][5]]
                            
                            }) for i, j in enumerate(model_outputs)],  ignore_index=True)
    
else:
    result_df = pd.concat([pd.DataFrame({'Input': [dev_df.iloc[i]['Input']],
                            'Correct Label': [class_list[dev_df.iloc[i]['Label']]], 
                            'Predicted Label': [class_list[np.argmax(model_outputs[i])]],
                            'Score_Not_offensive': [model_outputs[i][0]],
                            'Score_Offensive_Targeted_Insult_Group': [model_outputs[i][1]],
                            'Score_Offensive_Targeted_Insult_Individual': [model_outputs[i][2]],
                            'Score_Offensive_Untargetede': [model_outputs[i][3]],
                            f'Score_not-{lang}': [model_outputs[i][4]]
                            
                            }) for i, j in enumerate(model_outputs)],  ignore_index=True)

## Write results to file

In [25]:
result_df = result_df.apply(lambda row: normalize(row), axis=1)
result_df.to_excel(f'mbert_val_{lang}_with_pseudo-labelling_focalloss.xlsx', encoding='utf-8')