In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, ConvBertForSequenceClassification, ConvBertTokenizer
from transformers import AutoTokenizer

# Load dataset
df_olidtest = pd.read_csv('../datasets/cleaned_OLID_test.tsv', sep="\t")
# df = pd.read_csv('datasets/cleaned_OLID.tsv', sep="\t")
df_solid = pd.read_csv('../datasets/cleaned_SOLIDtest6K_trainer.tsv', sep='\t')
df_troff = pd.read_csv('../datasets/cleaned_tr_offenseval_test.tsv', sep='\t')
df_hso = pd.read_csv('../datasets/cleaned_hatespeech_offensive_test.tsv', sep='\t')

# Assuming your columns are named 'tweet' and 'class', change accordingly
tweets_olid = df_olidtest['tweet'].values
labels_df_olid = df_olidtest['label'].values

tweets_solid = df_solid['tweet'].values
labels_df_solid = df_solid['label'].values

tweets_troff = df_troff['tweet'].values
labels_df_troff = df_troff['label'].values

tweets_hso = df_hso['tweet'].values
labels_df_hso = df_hso['label'].values

full_model_name = 'YituTech/conv-bert-base'

tokenizer = AutoTokenizer.from_pretrained(full_model_name)

# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")

# Tokenize and encode the training and validation texts
# train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True)
encodings_olid = tokenizer(tweets_olid.tolist(), truncation=True, padding=True)
encodings_solid = tokenizer(tweets_solid.tolist(), truncation=True, padding=True)
encodings_troff = tokenizer(tweets_troff.tolist(), truncation=True, padding=True)
encodings_hso = tokenizer(tweets_hso.tolist(), truncation=True, padding=True)



Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [3]:
import torch

class TweetDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# train_dataset = TweetDataset(train_encodings, train_labels)
val_dataset_olid = TweetDataset(encodings_olid, labels_df_olid)

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=12, shuffle=True)
val_loader_olid = torch.utils.data.DataLoader(val_dataset_olid, batch_size=12, shuffle=False)


val_dataset_solid = TweetDataset(encodings_solid, labels_df_solid)
val_loader_solid = torch.utils.data.DataLoader(val_dataset_solid, batch_size=12, shuffle=False)

val_dataset_troff = TweetDataset(encodings_troff, labels_df_troff)
val_loader_troff = torch.utils.data.DataLoader(val_dataset_troff, batch_size=12, shuffle=False)

val_dataset_hso = TweetDataset(encodings_hso, labels_df_hso)
val_loader_hso = torch.utils.data.DataLoader(val_dataset_hso, batch_size=12, shuffle=False)

In [6]:
import torch
from torch import optim
from transformers import BertForSequenceClassification, ConvBertForSequenceClassification
import time
model_name = "bert-base-uncased"

model_olid = BertForSequenceClassification.from_pretrained(f"models/{model_name}_olid")
model_solid = BertForSequenceClassification.from_pretrained(f"models/{model_name}_solid")
model_olidsolid = BertForSequenceClassification.from_pretrained(f"models/{model_name}_olid_solid")
model_solidtroff = BertForSequenceClassification.from_pretrained(f"models/{model_name}_solid_tr")
model_hso = BertForSequenceClassification.from_pretrained(f"models/{model_name}_hso")

# Define optimizer and learning rate
optimizer = optim.AdamW(model_olid.parameters(), lr=1e-5)

# Training loop
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



In [7]:
## olid model on olid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  2.151456356048584  seconds
Validation Accuracy: 0.8476744186046512
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       620
           1       0.73      0.71      0.72       240

    accuracy                           0.85       860
   macro avg       0.81      0.81      0.81       860
weighted avg       0.85      0.85      0.85       860

True Positives (TP): 171
True Negatives (TN): 558
False Positives (FP): 62
False Negatives (FN): 69


In [8]:
## olid on solid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  12.404554843902588  seconds
Validation Accuracy: 0.9209077256799599
              precision    recall  f1-score   support

           0       0.99      0.85      0.91      2991
           1       0.87      0.99      0.93      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2978
True Negatives (TN): 2541
False Positives (FP): 450
False Negatives (FN): 24


In [9]:
## olid model on hso test

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  11.079460620880127  seconds
Validation Accuracy: 0.8384103288279201
              precision    recall  f1-score   support

           0       0.52      0.67      0.58       835
           1       0.93      0.87      0.90      4122

    accuracy                           0.84      4957
   macro avg       0.72      0.77      0.74      4957
weighted avg       0.86      0.84      0.85      4957

True Positives (TP): 3596
True Negatives (TN): 560
False Positives (FP): 275
False Negatives (FN): 526


In [10]:
## solid model on olid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  2.1589205265045166  seconds
Validation Accuracy: 0.8383720930232558
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       620
           1       0.71      0.72      0.71       240

    accuracy                           0.84       860
   macro avg       0.80      0.80      0.80       860
weighted avg       0.84      0.84      0.84       860

True Positives (TP): 173
True Negatives (TN): 548
False Positives (FP): 72
False Negatives (FN): 67


In [11]:
## solid model on solid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])


with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  12.400338172912598  seconds
Validation Accuracy: 0.9182379442683131
              precision    recall  f1-score   support

           0       0.99      0.85      0.91      2991
           1       0.87      0.99      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2969
True Negatives (TN): 2534
False Positives (FP): 457
False Negatives (FN): 33


In [12]:
## solid model on hso test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  11.110952615737915  seconds
Validation Accuracy: 0.802703247932217
              precision    recall  f1-score   support

           0       0.44      0.68      0.54       835
           1       0.93      0.83      0.87      4122

    accuracy                           0.80      4957
   macro avg       0.69      0.75      0.71      4957
weighted avg       0.85      0.80      0.82      4957

True Positives (TP): 3410
True Negatives (TN): 569
False Positives (FP): 266
False Negatives (FN): 712


In [13]:
## olid + solid model on olid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")


start
end
olid dataset test time:  2.1608002185821533  seconds
Validation Accuracy: 0.8244186046511628
              precision    recall  f1-score   support

           0       0.90      0.85      0.87       620
           1       0.66      0.76      0.71       240

    accuracy                           0.82       860
   macro avg       0.78      0.81      0.79       860
weighted avg       0.83      0.82      0.83       860

True Positives (TP): 183
True Negatives (TN): 526
False Positives (FP): 94
False Negatives (FN): 57


In [14]:
## olid + solid model on solid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  12.41598916053772  seconds
Validation Accuracy: 0.9172367762389454
              precision    recall  f1-score   support

           0       0.99      0.84      0.91      2991
           1       0.86      0.99      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2979
True Negatives (TN): 2518
False Positives (FP): 473
False Negatives (FN): 23


In [15]:
## olid + solid model on hso test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  11.135472297668457  seconds
Validation Accuracy: 0.8178333669558201
              precision    recall  f1-score   support

           0       0.47      0.64      0.54       835
           1       0.92      0.85      0.89      4122

    accuracy                           0.82      4957
   macro avg       0.70      0.75      0.71      4957
weighted avg       0.85      0.82      0.83      4957

True Positives (TP): 3516
True Negatives (TN): 538
False Positives (FP): 297
False Negatives (FN): 606


In [16]:
## solid + troff model on olid test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid datset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid datset test time:  2.1643564701080322  seconds
Validation Accuracy: 0.8337209302325581
              precision    recall  f1-score   support

           0       0.85      0.93      0.89       620
           1       0.76      0.59      0.66       240

    accuracy                           0.83       860
   macro avg       0.81      0.76      0.78       860
weighted avg       0.83      0.83      0.83       860

True Positives (TP): 141
True Negatives (TN): 576
False Positives (FP): 44
False Negatives (FN): 99


In [17]:

## olid + troff model on solid test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  12.433947086334229  seconds
Validation Accuracy: 0.9175704989154013
              precision    recall  f1-score   support

           0       0.95      0.88      0.91      2991
           1       0.89      0.96      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.92      0.92      0.92      5993
weighted avg       0.92      0.92      0.92      5993

True Positives (TP): 2878
True Negatives (TN): 2621
False Positives (FP): 370
False Negatives (FN): 124


In [18]:

## olid + troff model on hso test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("hso test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso test time:  11.139912605285645  seconds
Validation Accuracy: 0.7730482146459552
              precision    recall  f1-score   support

           0       0.41      0.76      0.53       835
           1       0.94      0.78      0.85      4122

    accuracy                           0.77      4957
   macro avg       0.67      0.77      0.69      4957
weighted avg       0.85      0.77      0.80      4957

True Positives (TP): 3196
True Negatives (TN): 636
False Positives (FP): 199
False Negatives (FN): 926


In [19]:
## solid + troff model on troff test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_troff:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("troff test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_troff, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_troff, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
troff test time:  38.539230823516846  seconds
Validation Accuracy: 0.8512091038406828
              precision    recall  f1-score   support

           0       0.85      0.99      0.91      2804
           1       0.90      0.30      0.45       711

    accuracy                           0.85      3515
   macro avg       0.87      0.64      0.68      3515
weighted avg       0.86      0.85      0.82      3515

True Positives (TP): 211
True Negatives (TN): 2781
False Positives (FP): 23
False Negatives (FN): 500


In [20]:
## hsomodel on olid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid test time:  2.1615843772888184  seconds
Validation Accuracy: 0.8395348837209302
              precision    recall  f1-score   support

           0       0.85      0.94      0.89       620
           1       0.78      0.59      0.67       240

    accuracy                           0.84       860
   macro avg       0.82      0.76      0.78       860
weighted avg       0.83      0.84      0.83       860

True Positives (TP): 141
True Negatives (TN): 581
False Positives (FP): 39
False Negatives (FN): 99


In [21]:
## hsomodel on solid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid test time:  12.5849027633667  seconds
Validation Accuracy: 0.9154013015184381
              precision    recall  f1-score   support

           0       0.95      0.88      0.91      2991
           1       0.89      0.95      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.92      0.92      0.92      5993
weighted avg       0.92      0.92      0.92      5993

True Positives (TP): 2864
True Negatives (TN): 2622
False Positives (FP): 369
False Negatives (FN): 138


In [22]:
## hsomodel on hso test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid+solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid+solid test time:  11.167417764663696  seconds
Validation Accuracy: 0.7734516844865846
              precision    recall  f1-score   support

           0       0.41      0.76      0.53       835
           1       0.94      0.78      0.85      4122

    accuracy                           0.77      4957
   macro avg       0.67      0.77      0.69      4957
weighted avg       0.85      0.77      0.80      4957

True Positives (TP): 3196
True Negatives (TN): 638
False Positives (FP): 197
False Negatives (FN): 926
