In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, ConvBertForSequenceClassification, ConvBertTokenizer
from transformers import AutoTokenizer

# Load dataset
df_olidtest = pd.read_csv('../datasets/cleaned_OLID_test.tsv', sep="\t")
# df = pd.read_csv('datasets/cleaned_OLID.tsv', sep="\t")
df_solid = pd.read_csv('../datasets/cleaned_SOLIDtest6K_trainer.tsv', sep='\t')
df_troff = pd.read_csv('../datasets/cleaned_tr_offenseval_test.tsv', sep='\t')
df_hso = pd.read_csv('../datasets/cleaned_hatespeech_offensive_test.tsv', sep='\t')

# Assuming your columns are named 'tweet' and 'class', change accordingly
tweets_olid = df_olidtest['tweet'].values
labels_df_olid = df_olidtest['label'].values

tweets_solid = df_solid['tweet'].values
labels_df_solid = df_solid['label'].values

tweets_troff = df_troff['tweet'].values
labels_df_troff = df_troff['label'].values

tweets_hso = df_hso['tweet'].values
labels_df_hso = df_hso['label'].values

full_model_name = 'YituTech/conv-bert-base'

tokenizer = AutoTokenizer.from_pretrained(full_model_name)

# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")

# Tokenize and encode the training and validation texts
# train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True)
encodings_olid = tokenizer(tweets_olid.tolist(), truncation=True, padding=True)
encodings_solid = tokenizer(tweets_solid.tolist(), truncation=True, padding=True)
encodings_troff = tokenizer(tweets_troff.tolist(), truncation=True, padding=True)
encodings_hso = tokenizer(tweets_hso.tolist(), truncation=True, padding=True)



Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [4]:
import torch

class TweetDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# train_dataset = TweetDataset(train_encodings, train_labels)
val_dataset_olid = TweetDataset(encodings_olid, labels_df_olid)

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=12, shuffle=True)
val_loader_olid = torch.utils.data.DataLoader(val_dataset_olid, batch_size=12, shuffle=False)


val_dataset_solid = TweetDataset(encodings_solid, labels_df_solid)
val_loader_solid = torch.utils.data.DataLoader(val_dataset_solid, batch_size=12, shuffle=False)

val_dataset_troff = TweetDataset(encodings_troff, labels_df_troff)
val_loader_troff = torch.utils.data.DataLoader(val_dataset_troff, batch_size=12, shuffle=False)

val_dataset_hso = TweetDataset(encodings_hso, labels_df_hso)
val_loader_hso = torch.utils.data.DataLoader(val_dataset_hso, batch_size=12, shuffle=False)

In [5]:
import torch
from torch import optim
from transformers import BertForSequenceClassification, ConvBertForSequenceClassification
import time
model_name = "conv-bert-base"

model_olid = ConvBertForSequenceClassification.from_pretrained(f"models/{model_name}_olid")
model_solid = ConvBertForSequenceClassification.from_pretrained(f"models/{model_name}_solid")
model_olidsolid = ConvBertForSequenceClassification.from_pretrained(f"models/{model_name}_olid_solid")
model_solidtroff = ConvBertForSequenceClassification.from_pretrained(f"models/{model_name}_solid_tr")
model_hso = ConvBertForSequenceClassification.from_pretrained(f"models/{model_name}_hso")

# Define optimizer and learning rate
optimizer = optim.AdamW(model_olid.parameters(), lr=1e-5)

# Training loop
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



In [6]:
## olid model on olid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  2.590994358062744  seconds
Validation Accuracy: 0.8395348837209302
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       620
           1       0.70      0.73      0.72       240

    accuracy                           0.84       860
   macro avg       0.80      0.81      0.80       860
weighted avg       0.84      0.84      0.84       860

True Positives (TP): 176
True Negatives (TN): 546
False Positives (FP): 74
False Negatives (FN): 64


In [7]:
## olid on solid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  13.694557428359985  seconds
Validation Accuracy: 0.9215751710328717
              precision    recall  f1-score   support

           0       1.00      0.85      0.92      2991
           1       0.87      1.00      0.93      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2993
True Negatives (TN): 2530
False Positives (FP): 461
False Negatives (FN): 9


In [8]:
## olid model on hso test

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  12.042570352554321  seconds
Validation Accuracy: 0.892071817631632
              precision    recall  f1-score   support

           0       0.70      0.62      0.66       835
           1       0.93      0.95      0.94      4122

    accuracy                           0.89      4957
   macro avg       0.81      0.79      0.80      4957
weighted avg       0.89      0.89      0.89      4957

True Positives (TP): 3901
True Negatives (TN): 521
False Positives (FP): 314
False Negatives (FN): 221


In [9]:
## solid model on olid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  2.401766538619995  seconds
Validation Accuracy: 0.8418604651162791
              precision    recall  f1-score   support

           0       0.87      0.92      0.89       620
           1       0.75      0.65      0.70       240

    accuracy                           0.84       860
   macro avg       0.81      0.78      0.79       860
weighted avg       0.84      0.84      0.84       860

True Positives (TP): 155
True Negatives (TN): 569
False Positives (FP): 51
False Negatives (FN): 85


In [10]:
## solid model on solid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])


with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  13.657119512557983  seconds
Validation Accuracy: 0.9170699149007175
              precision    recall  f1-score   support

           0       0.97      0.86      0.91      2991
           1       0.87      0.98      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.92      0.92      0.92      5993
weighted avg       0.92      0.92      0.92      5993

True Positives (TP): 2929
True Negatives (TN): 2567
False Positives (FP): 424
False Negatives (FN): 73


In [11]:
## solid model on hso test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  12.246433019638062  seconds
Validation Accuracy: 0.8176316320355054
              precision    recall  f1-score   support

           0       0.47      0.78      0.59       835
           1       0.95      0.83      0.88      4122

    accuracy                           0.82      4957
   macro avg       0.71      0.80      0.74      4957
weighted avg       0.87      0.82      0.83      4957

True Positives (TP): 3401
True Negatives (TN): 652
False Positives (FP): 183
False Negatives (FN): 721


In [12]:
## olid + solid model on olid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")


start
end
olid dataset test time:  2.4198782444000244  seconds
Validation Accuracy: 0.8104651162790698
              precision    recall  f1-score   support

           0       0.91      0.82      0.86       620
           1       0.63      0.78      0.70       240

    accuracy                           0.81       860
   macro avg       0.77      0.80      0.78       860
weighted avg       0.83      0.81      0.82       860

True Positives (TP): 187
True Negatives (TN): 510
False Positives (FP): 110
False Negatives (FN): 53


In [13]:
## olid + solid model on solid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  13.695028305053711  seconds
Validation Accuracy: 0.9170699149007175
              precision    recall  f1-score   support

           0       0.99      0.84      0.91      2991
           1       0.86      0.99      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2983
True Negatives (TN): 2513
False Positives (FP): 478
False Negatives (FN): 19


In [14]:
## olid + solid model on hso test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  12.262091875076294  seconds
Validation Accuracy: 0.8301391970950172
              precision    recall  f1-score   support

           0       0.50      0.69      0.58       835
           1       0.93      0.86      0.89      4122

    accuracy                           0.83      4957
   macro avg       0.71      0.78      0.74      4957
weighted avg       0.86      0.83      0.84      4957

True Positives (TP): 3537
True Negatives (TN): 578
False Positives (FP): 257
False Negatives (FN): 585


In [15]:
## solid + troff model on olid test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid datset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid datset test time:  2.473078489303589  seconds
Validation Accuracy: 0.8406976744186047
              precision    recall  f1-score   support

           0       0.84      0.97      0.90       620
           1       0.86      0.51      0.64       240

    accuracy                           0.84       860
   macro avg       0.85      0.74      0.77       860
weighted avg       0.84      0.84      0.83       860

True Positives (TP): 123
True Negatives (TN): 600
False Positives (FP): 20
False Negatives (FN): 117


In [16]:

## olid + troff model on solid test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  13.583001613616943  seconds
Validation Accuracy: 0.9123977974303354
              precision    recall  f1-score   support

           0       0.92      0.90      0.91      2991
           1       0.90      0.93      0.91      3002

    accuracy                           0.91      5993
   macro avg       0.91      0.91      0.91      5993
weighted avg       0.91      0.91      0.91      5993

True Positives (TP): 2780
True Negatives (TN): 2688
False Positives (FP): 303
False Negatives (FN): 222


In [17]:

## olid + troff model on hso test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("hso test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso test time:  12.239339590072632  seconds
Validation Accuracy: 0.7924147669961671
              precision    recall  f1-score   support

           0       0.44      0.91      0.60       835
           1       0.98      0.77      0.86      4122

    accuracy                           0.79      4957
   macro avg       0.71      0.84      0.73      4957
weighted avg       0.89      0.79      0.82      4957

True Positives (TP): 3170
True Negatives (TN): 758
False Positives (FP): 77
False Negatives (FN): 952


In [18]:
## solid + troff model on troff test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_troff:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("troff test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_troff, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_troff, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
troff test time:  40.16333270072937  seconds
Validation Accuracy: 0.8463726884779517
              precision    recall  f1-score   support

           0       0.84      0.99      0.91      2804
           1       0.89      0.28      0.42       711

    accuracy                           0.85      3515
   macro avg       0.87      0.63      0.67      3515
weighted avg       0.85      0.85      0.81      3515

True Positives (TP): 196
True Negatives (TN): 2779
False Positives (FP): 25
False Negatives (FN): 515


In [20]:
## hsomodel on olid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid test time:  2.4205334186553955  seconds
Validation Accuracy: 0.8430232558139535
              precision    recall  f1-score   support

           0       0.86      0.94      0.90       620
           1       0.80      0.59      0.68       240

    accuracy                           0.84       860
   macro avg       0.83      0.76      0.79       860
weighted avg       0.84      0.84      0.83       860

True Positives (TP): 141
True Negatives (TN): 584
False Positives (FP): 36
False Negatives (FN): 99


In [21]:
## hsomodel on solid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid test time:  13.575922012329102  seconds
Validation Accuracy: 0.913565826797931
              precision    recall  f1-score   support

           0       0.94      0.88      0.91      2991
           1       0.89      0.95      0.92      3002

    accuracy                           0.91      5993
   macro avg       0.92      0.91      0.91      5993
weighted avg       0.92      0.91      0.91      5993

True Positives (TP): 2838
True Negatives (TN): 2637
False Positives (FP): 354
False Negatives (FN): 164


In [22]:
## hsomodel on hso test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid+solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid+solid test time:  12.279685974121094  seconds
Validation Accuracy: 0.7964494654024612
              precision    recall  f1-score   support

           0       0.45      0.86      0.59       835
           1       0.96      0.78      0.86      4122

    accuracy                           0.80      4957
   macro avg       0.71      0.82      0.73      4957
weighted avg       0.88      0.80      0.82      4957

True Positives (TP): 3232
True Negatives (TN): 716
False Positives (FP): 119
False Negatives (FN): 890
