In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, ConvBertForSequenceClassification, ConvBertTokenizer
from transformers import AutoTokenizer

# Load dataset
df_olidtest = pd.read_csv('../datasets/cleaned_OLID_test.tsv', sep="\t")
# df = pd.read_csv('datasets/cleaned_OLID.tsv', sep="\t")
df_solid = pd.read_csv('../datasets/cleaned_SOLIDtest6K_trainer.tsv', sep='\t')
df_troff = pd.read_csv('../datasets/cleaned_tr_offenseval_test.tsv', sep='\t')
df_hso = pd.read_csv('../datasets/cleaned_hatespeech_offensive_test.tsv', sep='\t')

# Assuming your columns are named 'tweet' and 'class', change accordingly
tweets_olid = df_olidtest['tweet'].values
labels_df_olid = df_olidtest['label'].values

tweets_solid = df_solid['tweet'].values
labels_df_solid = df_solid['label'].values

tweets_troff = df_troff['tweet'].values
labels_df_troff = df_troff['label'].values

tweets_hso = df_hso['tweet'].values
labels_df_hso = df_hso['label'].values


tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased") ## bu ve alttaki değişiyor, birde buna göre importlar değişir

# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")

# Tokenize and encode the training and validation texts
# train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True)
encodings_olid = tokenizer(tweets_olid.tolist(), truncation=True, padding=True)
encodings_solid = tokenizer(tweets_solid.tolist(), truncation=True, padding=True)
encodings_troff = tokenizer(tweets_troff.tolist(), truncation=True, padding=True)
encodings_hso = tokenizer(tweets_hso.tolist(), truncation=True, padding=True)





In [29]:
import torch

class TweetDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# train_dataset = TweetDataset(train_encodings, train_labels)
val_dataset_olid = TweetDataset(encodings_olid, labels_df_olid)

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=12, shuffle=True)
val_loader_olid = torch.utils.data.DataLoader(val_dataset_olid, batch_size=12, shuffle=False)


val_dataset_solid = TweetDataset(encodings_solid, labels_df_solid)
val_loader_solid = torch.utils.data.DataLoader(val_dataset_solid, batch_size=12, shuffle=False)

val_dataset_troff = TweetDataset(encodings_troff, labels_df_troff)
val_loader_troff = torch.utils.data.DataLoader(val_dataset_troff, batch_size=12, shuffle=False)

val_dataset_hso = TweetDataset(encodings_hso, labels_df_hso)
val_loader_hso = torch.utils.data.DataLoader(val_dataset_hso, batch_size=12, shuffle=False)

In [30]:
import torch
from torch import optim
from transformers import BertForSequenceClassification, DistilBertForSequenceClassification
import time
model_name = "distilbert-base-uncased"

model_olid = DistilBertForSequenceClassification.from_pretrained(f"models/{model_name}_olid")
model_solid = DistilBertForSequenceClassification.from_pretrained(f"models/{model_name}_solid")
model_olidsolid = DistilBertForSequenceClassification.from_pretrained(f"models/{model_name}_olid_solid")
model_solidtroff = DistilBertForSequenceClassification.from_pretrained(f"models/{model_name}_solid_tr")
model_hso = DistilBertForSequenceClassification.from_pretrained(f"models/{model_name}_hso")

# Define optimizer and learning rate
optimizer = optim.AdamW(model_olid.parameters(), lr=1e-5)

# Training loop
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



In [48]:
## olid model on olid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  1.2041773796081543  seconds
Validation Accuracy: 0.8465116279069768
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       620
           1       0.73      0.70      0.72       240

    accuracy                           0.85       860
   macro avg       0.81      0.80      0.81       860
weighted avg       0.84      0.85      0.85       860

True Positives (TP): 169
True Negatives (TN): 559
False Positives (FP): 61
False Negatives (FN): 71


In [50]:
## olid on solid test

import numpy as np
import time
from sklearn.metrics import classification_report

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  6.376095533370972  seconds
Validation Accuracy: 0.9187385282829968
              precision    recall  f1-score   support

           0       0.99      0.85      0.91      2991
           1       0.87      0.99      0.92      3002

    accuracy                           0.92      5993
   macro avg       0.93      0.92      0.92      5993
weighted avg       0.93      0.92      0.92      5993

True Positives (TP): 2974
True Negatives (TN): 2532
False Positives (FP): 459
False Negatives (FN): 28


In [51]:
## olid model on hso test

# Evaluation
model_olid.to(device)
model_olid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  5.6520795822143555  seconds
Validation Accuracy: 0.8107726447448054
              precision    recall  f1-score   support

           0       0.46      0.67      0.55       835
           1       0.93      0.84      0.88      4122

    accuracy                           0.81      4957
   macro avg       0.69      0.76      0.71      4957
weighted avg       0.85      0.81      0.82      4957

True Positives (TP): 3457
True Negatives (TN): 562
False Positives (FP): 273
False Negatives (FN): 665


In [52]:
## solid model on olid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("olid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid dataset test time:  2.665764331817627  seconds
Validation Accuracy: 0.827906976744186
              precision    recall  f1-score   support

           0       0.89      0.87      0.88       620
           1       0.68      0.72      0.70       240

    accuracy                           0.83       860
   macro avg       0.79      0.80      0.79       860
weighted avg       0.83      0.83      0.83       860

True Positives (TP): 173
True Negatives (TN): 539
False Positives (FP): 81
False Negatives (FN): 67


In [53]:
## solid model on solid test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])


with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("solid dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  15.686945915222168  seconds
Validation Accuracy: 0.9140664108126147
              precision    recall  f1-score   support

           0       0.98      0.85      0.91      2991
           1       0.86      0.98      0.92      3002

    accuracy                           0.91      5993
   macro avg       0.92      0.91      0.91      5993
weighted avg       0.92      0.91      0.91      5993

True Positives (TP): 2947
True Negatives (TN): 2531
False Positives (FP): 460
False Negatives (FN): 55


In [54]:
## solid model on hso test

# Evaluation
###
model_solid.to(device)
model_solid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])

with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso: ###
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solid(input_ids, attention_mask=attention_mask) ###
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solid.to('cpu') ###

print("hso dataset test time: ", test_end - test_start, " seconds")
accuracy = correct / total
print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list)) ###

from sklearn.metrics import confusion_matrix

# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list) ###

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  13.469242334365845  seconds
Validation Accuracy: 0.8022997780915877
              precision    recall  f1-score   support

           0       0.45      0.72      0.55       835
           1       0.94      0.82      0.87      4122

    accuracy                           0.80      4957
   macro avg       0.69      0.77      0.71      4957
weighted avg       0.85      0.80      0.82      4957

True Positives (TP): 3372
True Negatives (TN): 605
False Positives (FP): 230
False Negatives (FN): 750


In [55]:
## olid + solid model on olid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("olid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")


start
end
olid dataset test time:  2.7018203735351562  seconds
Validation Accuracy: 0.8151162790697675
              precision    recall  f1-score   support

           0       0.89      0.85      0.87       620
           1       0.65      0.72      0.69       240

    accuracy                           0.82       860
   macro avg       0.77      0.79      0.78       860
weighted avg       0.82      0.82      0.82       860

True Positives (TP): 173
True Negatives (TN): 528
False Positives (FP): 92
False Negatives (FN): 67


In [56]:
## olid + solid model on solid test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  15.448938846588135  seconds
Validation Accuracy: 0.9138995494743868
              precision    recall  f1-score   support

           0       0.99      0.84      0.91      2991
           1       0.86      0.99      0.92      3002

    accuracy                           0.91      5993
   macro avg       0.92      0.91      0.91      5993
weighted avg       0.92      0.91      0.91      5993

True Positives (TP): 2973
True Negatives (TN): 2504
False Positives (FP): 487
False Negatives (FN): 29


In [57]:
## olid + solid model on hso test

# Evaluation

model_olidsolid.to(device)

model_olidsolid.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_olidsolid(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_olidsolid.to('cpu')

print("hso dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
hso dataset test time:  13.50259518623352  seconds
Validation Accuracy: 0.8059310066572524
              precision    recall  f1-score   support

           0       0.45      0.67      0.54       835
           1       0.93      0.83      0.88      4122

    accuracy                           0.81      4957
   macro avg       0.69      0.75      0.71      4957
weighted avg       0.85      0.81      0.82      4957

True Positives (TP): 3435
True Negatives (TN): 560
False Positives (FP): 275
False Negatives (FN): 687


In [58]:
## solid + troff model on olid test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid datset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid datset test time:  2.6847922801971436  seconds
Validation Accuracy: 0.8395348837209302
              precision    recall  f1-score   support

           0       0.87      0.92      0.89       620
           1       0.75      0.64      0.69       240

    accuracy                           0.84       860
   macro avg       0.81      0.78      0.79       860
weighted avg       0.83      0.84      0.84       860

True Positives (TP): 153
True Negatives (TN): 569
False Positives (FP): 51
False Negatives (FN): 87


In [59]:

## olid + troff model on solid test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid dataset test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid dataset test time:  15.760691165924072  seconds
Validation Accuracy: 0.9147338561655265
              precision    recall  f1-score   support

           0       0.96      0.86      0.91      2991
           1       0.88      0.96      0.92      3002

    accuracy                           0.91      5993
   macro avg       0.92      0.91      0.91      5993
weighted avg       0.92      0.91      0.91      5993

True Positives (TP): 2896
True Negatives (TN): 2586
False Positives (FP): 405
False Negatives (FN): 106


In [42]:

## olid + troff model on hso test

# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("hso test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid+solid test time:  5.710323095321655  seconds
Validation Accuracy: 0.7926165019164817
              precision    recall  f1-score   support

           0       0.44      0.78      0.56       835
           1       0.95      0.80      0.86      4122

    accuracy                           0.79      4957
   macro avg       0.69      0.79      0.71      4957
weighted avg       0.86      0.79      0.81      4957

True Positives (TP): 3280
True Negatives (TN): 649
False Positives (FP): 186
False Negatives (FN): 842


In [60]:
## solid + troff model on troff test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_troff:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("troff test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_troff, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_troff, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
troff test time:  21.207704067230225  seconds
Validation Accuracy: 0.8588904694167852
              precision    recall  f1-score   support

           0       0.86      0.98      0.92      2804
           1       0.82      0.38      0.52       711

    accuracy                           0.86      3515
   macro avg       0.84      0.68      0.72      3515
weighted avg       0.85      0.86      0.84      3515

True Positives (TP): 273
True Negatives (TN): 2746
False Positives (FP): 58
False Negatives (FN): 438


In [61]:
## solid + troff model on troff test


# Evaluation

model_solidtroff.to(device)

model_solidtroff.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_solidtroff(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid test time:  1.2061831951141357  seconds
Validation Accuracy: 0.8395348837209302
              precision    recall  f1-score   support

           0       0.87      0.92      0.89       620
           1       0.75      0.64      0.69       240

    accuracy                           0.84       860
   macro avg       0.81      0.78      0.79       860
weighted avg       0.83      0.84      0.84       860

True Positives (TP): 153
True Negatives (TN): 569
False Positives (FP): 51
False Negatives (FN): 87


In [63]:
## hsomodel on olid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_olid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_olid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_olid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid test time:  1.206183671951294  seconds
Validation Accuracy: 0.8372093023255814
              precision    recall  f1-score   support

           0       0.86      0.92      0.89       620
           1       0.75      0.62      0.68       240

    accuracy                           0.84       860
   macro avg       0.81      0.77      0.79       860
weighted avg       0.83      0.84      0.83       860

True Positives (TP): 149
True Negatives (TN): 571
False Positives (FP): 49
False Negatives (FN): 91


In [64]:
## hsomodel on solid test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_solid:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_solid, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_solid, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
solid test time:  6.338438987731934  seconds
Validation Accuracy: 0.9098948773569164
              precision    recall  f1-score   support

           0       0.95      0.87      0.91      2991
           1       0.88      0.95      0.91      3002

    accuracy                           0.91      5993
   macro avg       0.91      0.91      0.91      5993
weighted avg       0.91      0.91      0.91      5993

True Positives (TP): 2857
True Negatives (TN): 2596
False Positives (FP): 395
False Negatives (FN): 145


In [65]:
## hsomodel on hso test


# Evaluation

model_hso.to(device)

model_hso.eval()

# Perform evaluation on validation set and calculate metrics as needed
# Example: calculate accuracy
correct = 0
total = 0
i = 0
prediction_list = np.array([])
with torch.no_grad():
    test_start  = time.time()
    print('start')
    for batch in val_loader_hso:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model_hso(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        prediction_list = np.append(prediction_list, predictions.detach().cpu().numpy())
    print('end')
    
    test_end = time.time()

model_solidtroff.to('cpu')

print("olid+solid test time: ", test_end - test_start, " seconds")

accuracy = correct / total

print(f'Validation Accuracy: {accuracy}')
print(classification_report(labels_df_hso, prediction_list))

from sklearn.metrics import confusion_matrix
# Assuming you have the true labels in `val_labels` and the predicted labels in `prediction_list`
cm = confusion_matrix(labels_df_hso, prediction_list)

# Extract TP, TN, FP, FN from the confusion matrix
TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

start
end
olid+solid test time:  5.676846027374268  seconds
Validation Accuracy: 0.7811176114585435
              precision    recall  f1-score   support

           0       0.42      0.79      0.55       835
           1       0.95      0.78      0.86      4122

    accuracy                           0.78      4957
   macro avg       0.68      0.78      0.70      4957
weighted avg       0.86      0.78      0.80      4957

True Positives (TP): 3213
True Negatives (TN): 659
False Positives (FP): 176
False Negatives (FN): 909
