In [11]:
# Torch Imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm

# HF Transformers
import transformers
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding
from transformers import TrainingArguments, Trainer

#Datahandeling
import numpy as np
import pandas as pd
import datasets
from datasets import Dataset
from datasets import load_metric,list_metrics

# GPU Flushing
import gc

# Evaluation
import seaborn as sns
import sklearn
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [12]:
!nvidia-smi

Sat Feb 26 16:33:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 496.13       Driver Version: 496.13       CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:25:00.0  On |                  N/A |
| 29%   30C    P8     7W / 120W |   2984MiB /  3072MiB |      9%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [13]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce GTX 1060 3GB


In [14]:
train_test_ds = datasets.load_from_disk("../data/processed_data")

In [15]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

In [16]:

bert_model = 'distilbert-base-uncased' 
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier

In [17]:


data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [18]:
gc.collect()
torch.cuda.empty_cache()
#1/0

In [19]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss (suppose one has 3 labels with different weights)
        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([10.0, 1.0, 1.0],
        device=torch.device("cuda:{}".format(torch.cuda.current_device()))))
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [None]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    accuracy = load_metric("accuracy").compute(predictions=predictions, references=labels)
    precision = load_metric("precision").compute(predictions=predictions, references=labels,average="macro")
    f1 = load_metric("f1").compute(predictions=predictions, references=labels,average="macro")
    recall = load_metric("recall").compute(predictions=predictions, references=labels,average="macro")
    return {"accuracy":accuracy, "precision":precision, "recall":recall, "f1":f1}

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    evaluation_strategy="epoch"
    #eval_steps = 500
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_test_ds["train"],
    eval_dataset=train_test_ds["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics = compute_metrics
)

In [None]:
trainer.train()

In [None]:
predictions = trainer.predict(train_test_ds["test"])
print(predictions.predictions.shape, predictions.label_ids.shape)

In [None]:
print(predictions)

In [None]:
y_preds = np.argmax(predictions.predictions, axis=-1)

In [5]:

# constant for classes
classes = ("hate","offensive","neither")


In [None]:
print(classification_report(y_true=predictions.label_ids,y_pred = y_preds))

In [None]:

target_names = ("hate","offensive","neither")
cm = confusion_matrix(y_true=predictions.label_ids,y_pred=y_preds)
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(cmn, annot=True, fmt='.2f', xticklabels=target_names, yticklabels=target_names)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show(block=False)