# Utility Functions

In [1]:
import os
os.add_dll_directory("C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.5/bin")
import numpy as np

from sklearn.model_selection import train_test_split
from tensorflow.test import is_gpu_available

from scripts.utils import load_data
from scripts.model import recall_m, precision_m, f1_m, get_model_and_data

WANDB_PROJECT_NAME = os.getenv("WANDB_PROJECT_NAME") or "[NLP] lab-04 | misogyny classification"

# Load Data

In [2]:
df = load_data()
df.head()

Unnamed: 0,file_name,misogynous,shaming,stereotype,objectification,violence,Text Transcription
0,28.jpg,0,0,0,0,0,"not now, dad. We should burn Jon Snow. stop it..."
1,30.jpg,0,0,0,0,0,there may have been a mixcommunication with th...
2,33.jpg,0,0,0,0,0,i shouldn't have sold my boat
3,58.jpg,1,0,0,0,1,"Bitches be like, It was my fault i made him mad"
4,89.jpg,0,0,0,0,0,find a picture of 4 girls together on FB make ...


In [None]:
df.describe()

In [None]:
df.info()

# Data-preprocessing

In [3]:
df = df.drop(columns=["file_name"])
df.head()

Unnamed: 0,misogynous,shaming,stereotype,objectification,violence,Text Transcription
0,0,0,0,0,0,"not now, dad. We should burn Jon Snow. stop it..."
1,0,0,0,0,0,there may have been a mixcommunication with th...
2,0,0,0,0,0,i shouldn't have sold my boat
3,1,0,0,0,1,"Bitches be like, It was my fault i made him mad"
4,0,0,0,0,0,find a picture of 4 girls together on FB make ...


## Train-Test Split

In [4]:
X = df['Text Transcription']
y_task1 = df['misogynous']
y_task2 = df[["shaming", "stereotype", "objectification", "violence"]]

In [5]:

X_train, X_test, y_train_task1, y_test_task1 = train_test_split(X, y_task1, test_size=0.2, random_state=42)
_, _, y_train_task2, y_test_task2 = train_test_split(X, y_task2, test_size=0.2, random_state=42)


# Define Models

In [6]:
from wandb.keras import WandbCallback
from transformers import TFBertForSequenceClassification, TFAlbertForSequenceClassification, TFRobertaForSequenceClassification, TFDistilBertForSequenceClassification

## Task 1

Weights and Biases is going to be used for logging model training and hyperparameter tuning. The project is available at [[NLP] lab-04 | misogyny classification](https://wandb.ai/aleksandar1932/[NLP]%20lab-04%20%7C%20misogyny%20classification?workspace=user-aleksandar1932).

### Bert Model

In [9]:
from wandb import wandb
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy, hinge

run = wandb.init(project=WANDB_PROJECT_NAME, job_type="training")

In [8]:
model, train_input_ids, train_attention_masks, test_input_ids, test_attention_masks = get_model_and_data(TFBertForSequenceClassification, 2, X_train, X_test)

Creating TFBertForSequenceClassification-bert-base-cased with 2 labels
Tokenizing data with BertTokenizerFast


100%|██████████| 78/78 [00:00<00:00, 3722.08it/s]
100%|██████████| 20/20 [00:00<00:00, 3967.18it/s]
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
model.summary()
model.compile(optimizer=Adam(learning_rate=0.01),
                                  loss=hinge,
                                  metrics=['accuracy', f1_m,precision_m, recall_m])

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  108310272 
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 108,311,810
Trainable params: 108,311,810
Non-trainable params: 0
_________________________________________________________________


In [11]:
if not is_gpu_available():
    print("No GPU found. Using CPU")

model.fit([np.array(train_input_ids), np.array(train_attention_masks)],
          np.array(y_train_task1),batch_size=10, epochs=20, verbose=2,
          callbacks=[WandbCallback()]
          )


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Epoch 1/20
8/8 - 17s - loss: 1.9672 - accuracy: 0.4359 - f1_m: 0.2755 - precision_m: 0.2208 - recall_m: 0.5500 - 17s/epoch - 2s/step
Epoch 2/20
8/8 - 2s - loss: 1.3417 - accuracy: 0.5128 - f1_m: 0.3250 - precision_m: 0.3145 - recall_m: 0.6562 - 2s/epoch - 250ms/step
Epoch 3/20
8/8 - 2s - loss: 1.1743 - accuracy: 0.4744 - f1_m: 0.5598 - precision_m: 0.4563 - recall_m: 0.8250 - 2s/epoch - 250ms/step
Epoch 4/20
8/8 - 2s - loss: 1.7785 - accuracy: 0.4231 - f1_m: 0.4595 - precision_m: 0.3406 - recall_m: 0.8125 - 2s/epoch - 253ms/step
Epoch 5/20
8/8 - 2s - loss: 1.8016 - accuracy: 0.5256 - f1_m: 0.2681 - precision_m: 0.2125 - recall_m: 0.3750 - 2s/epoch - 258ms/step
Epoch 6/20
8/8 - 2s - loss: 1.4309 - accuracy: 0.5385 - f1_m: 0.3009 - precision_m: 0.2401 - recall_m: 0.6458 - 2s/epoch - 257ms/step
Epoch 7/20
8/8 - 2s - loss: 0.9910 - accuracy: 0.4872 - f1_m: 0.4542 - precision_m: 0.3375 - recall_m: 0.7188 - 2s/e

<keras.callbacks.History at 0x1fb80825cd0>

In [12]:
model.evaluate([np.array(test_input_ids), np.array(
    test_attention_masks)], np.array(y_test_task1), batch_size=2, verbose=2)
run.finish()

10/10 - 2s - loss: 0.8911 - accuracy: 0.3500 - f1_m: 0.0000e+00 - precision_m: 0.0000e+00 - recall_m: 0.0000e+00 - 2s/epoch - 181ms/step


0,1
accuracy,▃▅▄▃▆▆▅▆▅▅▆█▅▁▇▂█▄▆▃
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_m,▄▅█▇▄▄▇▇▄▄▅▄▅▅▆▁▄▄▄▆
loss,█▃▂▇▇▄▁▂▂▄▂▁▂▆█▂▁▃▂▂
precision_m,▄▆█▆▄▄▆▇▄▃▄▆▆▄▅▁▅▅▄▆
recall_m,▅▅▇▇▃▅▆▇▄▆▆▂▅▇█▁▄▅▆▆

0,1
accuracy,0.4359
epoch,19.0
f1_m,0.38677
loss,1.12885
precision_m,0.30365
recall_m,0.76071


### Albert Model

In [14]:
run = wandb.init(project=WANDB_PROJECT_NAME, job_type="training")
model, train_input_ids, train_attention_masks, test_input_ids, test_attention_masks = get_model_and_data(TFAlbertForSequenceClassification, 2, X_train, X_test)

model.summary()
model.compile(optimizer=Adam(learning_rate=0.01),
                                  loss=hinge,
                                  metrics=['accuracy', f1_m,precision_m, recall_m])
                                  
if not is_gpu_available():
    print("No GPU found. Using CPU")

model.fit([np.array(train_input_ids), np.array(train_attention_masks)],
          np.array(y_train_task1),batch_size=10, epochs=20, verbose=2,
          callbacks=[WandbCallback()]
          )

model.evaluate([np.array(test_input_ids), np.array(test_attention_masks)], np.array(y_test_task1), batch_size=2, verbose=2)
run.finish()

Creating TFAlbertForSequenceClassification-albert-base-v2 with 2 labels
Tokenizing data with AlbertTokenizerFast


100%|██████████| 78/78 [00:00<00:00, 2982.03it/s]
100%|██████████| 20/20 [00:00<00:00, 2849.20it/s]
All model checkpoint layers were used when initializing TFAlbertForSequenceClassification.

Some layers of TFAlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_albert_for_sequence_classification_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 albert (TFAlbertMainLayer)  multiple                  11683584  
                                                                 
 dropout_47 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 11,685,122
Trainable params: 11,685,122
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
8/8 - 12s - loss: 2.2776 - accuracy: 0.5000 - f1_m: 0.3200 - precision_m: 0.2094 - recall_m: 0.7500 - 12s/epoch - 2s/step
Epoch 2/20
8/8 - 1s - loss: 1.6479 - accuracy: 0.5000 - f1_m: 0.4106 - precision_m: 0.3089 - recall_m: 0.7250 - 1s/epoch - 180ms/step
Epoch 3/20
8

0,1
accuracy,▄▄▁▄▅▃▅▅▅▂▄▃▄█▂█▃▂▄▅
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_m,▃▅█▆▂▄▃▃▂▅▆▁▅▂▃▅▁▆▂▆
loss,█▅▃▅▂▂▂▂▂▃▄▂▂▁▃▂▄▂▂▃
precision_m,▃▅█▅▃▆█▄▄▅▅▁▇▂▂▇▁▇▄▅
recall_m,▅▅▆▇▁▂▂▄▄██▃▆▂▅▄▅▅▃▆

0,1
accuracy,0.55128
epoch,19.0
f1_m,0.43189
loss,1.23849
precision_m,0.30729
recall_m,0.8625


### Roberta Model

In [10]:
run = wandb.init(project=WANDB_PROJECT_NAME, job_type="training")
model, train_input_ids, train_attention_masks, test_input_ids, test_attention_masks = get_model_and_data(TFRobertaForSequenceClassification, 2, X_train, X_test)

model.summary()
model.compile(optimizer=Adam(learning_rate=0.01),
                                  loss=hinge,
                                  metrics=['accuracy', f1_m,precision_m, recall_m])
                                  
if not is_gpu_available():
    print("No GPU found. Using CPU")

model.fit([np.array(train_input_ids), np.array(train_attention_masks)],
          np.array(y_train_task1),batch_size=10, epochs=20, verbose=2,
          callbacks=[WandbCallback()]
          )

model.evaluate([np.array(test_input_ids), np.array(test_attention_masks)], np.array(y_test_task1), batch_size=2, verbose=2)
run.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maleksandar1932[0m (use `wandb login --relogin` to force relogin)
  warn("The `IPython.html` package has been deprecated since IPython 4.0. "


Creating TFRobertaForSequenceClassification-roberta-base with 2 labels
Tokenizing data with RobertaTokenizerFast


100%|██████████| 78/78 [00:00<00:00, 3397.43it/s]
100%|██████████| 20/20 [00:00<00:00, 3307.94it/s]
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFRobertaMainLayer  multiple                 124055040 
 )                                                               
                                                                 
 classifier (TFRobertaClassi  multiple                 592130    
 ficationHead)                                                   
                                                                 
Total params: 124,647,170
Trainable params: 124,647,170
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Epoch 1/20
8/8 - 17s - loss: 1.6940 - accuracy: 0.4744 - f1_m: 0.2184 - precision_m: 0.2750 - recall_m: 0.4271 - 17s/epoch - 2s/step
Epoch 2/20
8/8 - 2s - loss: 1.1995 - accuracy: 0.5000 - f1_m: 0.

0,1
accuracy,▄▅▄▂▂▇▄▄▄▃█▇▄▆▄▁▄▅▆▄
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_m,▄▆▅▅▅▄▆▅▅▅▁█▅▇▅▇▇▇▄▄
loss,█▃▄▆▄▃▄▄▄▆▂▁▇▄▁▁▃▅▃▅
precision_m,▅▅▄▄▃▄▅▃▄▅▁█▄▇▆▇▆▅▃▂
recall_m,▄▇▆▆▆▄█▇▆▇▁█▆▆▆▇▇█▄▇

0,1
accuracy,0.47436
epoch,19.0
f1_m,0.27151
loss,1.4429
precision_m,0.16806
recall_m,0.75


### DistilBert Model

In [11]:
run = wandb.init(project=WANDB_PROJECT_NAME, job_type="training")
model, train_input_ids, train_attention_masks, test_input_ids, test_attention_masks = get_model_and_data(TFDistilBertForSequenceClassification, 2, X_train, X_test)

model.summary()
model.compile(optimizer=Adam(learning_rate=0.01),
                                  loss=hinge,
                                  metrics=['accuracy', f1_m,precision_m, recall_m])
                                  
if not is_gpu_available():
    print("No GPU found. Using CPU")

model.fit([np.array(train_input_ids), np.array(train_attention_masks)],
          np.array(y_train_task1),batch_size=10, epochs=20, verbose=2,
          callbacks=[WandbCallback()]
          )

model.evaluate([np.array(test_input_ids), np.array(test_attention_masks)], np.array(y_test_task1), batch_size=2, verbose=2)
run.finish()

Creating TFDistilBertForSequenceClassification-distilbert-base-cased with 2 labels
Tokenizing data with DistilBertTokenizerFast


100%|██████████| 78/78 [00:00<00:00, 3781.27it/s]
100%|██████████| 20/20 [00:00<00:00, 3971.50it/s]
Some layers from the model checkpoint at distilbert-base-cased were not used when initializing TFDistilBertForSequenceClassification: ['activation_13', 'vocab_projector', 'vocab_transform', 'vocab_layer_norm']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['dropout_57', 'pre_classifie

Model: "tf_distil_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 distilbert (TFDistilBertMai  multiple                 65190912  
 nLayer)                                                         
                                                                 
 pre_classifier (Dense)      multiple                  590592    
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
 dropout_57 (Dropout)        multiple                  0         
                                                                 
Total params: 65,783,042
Trainable params: 65,783,042
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
8/8 - 9s - loss: 4.4850 - accuracy: 0.5256 - f1_m: 0.3460 - precisi

0,1
accuracy,▅▆▅▁▄▃▄▄▄▅▅▁▇▄▆▅▅▇▄█
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_m,█▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃
loss,█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
precision_m,█▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃
recall_m,█▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃

0,1
accuracy,0.60256
epoch,19.0
f1_m,0.11765
loss,0.96282
precision_m,0.08333
recall_m,0.2


### Conclusion

In [19]:
from IPython.display import IFrame
IFrame('https://wandb.ai/aleksandar1932/[NLP]%20lab-04%20%7C%20misogyny%20classification/reports/Task-1--VmlldzoxMzg3OTU0', width="100%", height=500)

## Task 2

In [None]:
num_classes = 4

### Bert Model

In [None]:
run = wandb.init(project=WANDB_PROJECT_NAME, job_type="training")
model, train_input_ids, train_attention_masks, test_input_ids, test_attention_masks = get_model_and_data(
    TFBertForSequenceClassification, num_classes, X_train, X_test)

In [None]:
model.compile(optimizer=Adam(learning_rate=0.01), loss="categorical_crossentropy",
              metrics=["accuracy", f1_m, precision_m, recall_m])

In [None]:
model.fit([np.array(train_input_ids), np.array(train_attention_masks)],
          np.array(y_train_task2), batch_size=70, epochs=1, verbose=2,
          callbacks=[WandbCallback()])


In [None]:
model.evaluate([np.array(test_input_ids), np.array(
    test_attention_masks)], np.array(y_test_task2), batch_size=70, verbose=2)
run.finish()


### Albert Model

In [None]:
# TODO

### Roberta Model

In [None]:
# TODO

### DistilBert Model

In [None]:
# TODO