In [1]:
import json
import torch

from sklearn.metrics import classification_report

# Import required libraries
import torch
from sklearn.metrics import accuracy_score, f1_score

import random


In [2]:
def read_jsonl_file(file_name):
    data = []
    with open(file_name, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

file1_data = read_jsonl_file('../dataset/webtext.test.jsonl')
file2_data = read_jsonl_file('../dataset/xl-1542M-k40.test.jsonl')

# for every dict in file1_data, add key "fake"，value = 0
file1_data = [{**dict, 'fake': 0} for dict in file1_data]

# for every dict in file2_data, add key "fake"，value = 1
file2_data = [{**dict, 'fake': 1} for dict in file2_data]

data = file1_data + file2_data

In [3]:
# create a new data that only have "text" and "fake" keys for testing
modified_data = [{"text": dict.get("text"), "fake": dict.get("fake")} for dict in data]

# shuffle data
random.shuffle(modified_data)

# RoBERTa

In [5]:
# Import required libraries
import numpy as np
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer

check_point = "roberta-base"

model_path = 'best_model_roberta.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaForSequenceClassification.from_pretrained("roberta-base").cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_roberta.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Accuracy: 0.5803
F1 score: 0.6930


# RoBERTa short

In [6]:
# Import required libraries
import numpy as np
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer

check_point = "roberta-base"

model_path = 'best_model_roberta_short.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaForSequenceClassification.from_pretrained("roberta-base").cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_roberta_short.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Accuracy: 0.6110
F1 score: 0.5528


# RoBERTa mixed

In [7]:
# Import required libraries
import numpy as np
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer

check_point = "roberta-base"

model_path = 'best_model_roberta_mixed.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaForSequenceClassification.from_pretrained("roberta-base").cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_roberta_mixed.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Accuracy: 0.6819
F1 score: 0.7184


# ELECTRA

In [8]:
# Import required libraries
import numpy as np
import torch
from transformers import ElectraForSequenceClassification, ElectraTokenizer

check_point = "google/electra-base-discriminator"

model_path = 'best_model_electra.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = ElectraTokenizer.from_pretrained(check_point)
    model = ElectraForSequenceClassification.from_pretrained(check_point).cuda()
    # tokenizer = BertTokenizer.from_pretrained(check_point)
    # model = BertForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_electra.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.d

Accuracy: 0.5586
F1 score: 0.2450


# ELECTRA mixed

In [9]:
# Import required libraries
import numpy as np
import torch
from transformers import ElectraForSequenceClassification, ElectraTokenizer

check_point = "google/electra-base-discriminator"

model_path = 'best_model_electra_mixed.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = ElectraTokenizer.from_pretrained(check_point)
    model = ElectraForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_electra_mixed.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.d

Accuracy: 0.5538
F1 score: 0.2386


# ELECTRA short

In [10]:
# Import required libraries
import numpy as np
import torch
from transformers import ElectraForSequenceClassification, ElectraTokenizer

check_point = "google/electra-base-discriminator"

model_path = 'best_model_electra_short.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = ElectraTokenizer.from_pretrained(check_point)
    model = ElectraForSequenceClassification.from_pretrained(check_point).cuda()
    # tokenizer = BertTokenizer.from_pretrained(check_point)
    # model = BertForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_electra_short.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.d

Accuracy: 0.5690
F1 score: 0.4062


# BERT

In [11]:
# Import required libraries
import numpy as np
import torch
from transformers import BertForSequenceClassification, BertTokenizer

check_point = "bert-base-uncased"

model_path = 'best_model_bert.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = BertTokenizer.from_pretrained(check_point)
    model = BertForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_bert.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Accuracy: 0.5583
F1 score: 0.2344


# BERT mixed

In [12]:
# Import required libraries
import numpy as np
import torch
from transformers import BertForSequenceClassification, BertTokenizer

check_point = "bert-base-uncased"

model_path = 'best_model_bert_mixed.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = BertTokenizer.from_pretrained(check_point)
    model = BertForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_bert_mixed.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Accuracy: 0.6024
F1 score: 0.3671


# BERT short

In [13]:
# Import required libraries
import numpy as np
import torch
from transformers import BertForSequenceClassification, BertTokenizer

check_point = "bert-base-uncased"

model_path = 'best_model_bert_short.pt'


def load_model(model_path):
    # Load the tokenizer and model from the "roberta-base" pre-trained model
    tokenizer = BertTokenizer.from_pretrained(check_point)
    model = BertForSequenceClassification.from_pretrained(check_point).cuda()

    # Load the saved state dict of the fine-tuned model
    model.load_state_dict(torch.load(model_path))

    return tokenizer, model


def preprocess_text(tokenizer, input_text, max_length):
    # Tokenize the input text using the tokenizer
    inputs = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    )

    # Get the input_ids and attention_mask tensors
    return inputs["input_ids"].cuda(), inputs["attention_mask"].cuda()


def get_prediction(model, input_ids, attention_mask):
    # Get the predicted label using the input_ids and attention_mask
    outputs = model(input_ids, attention_mask=attention_mask)
    predicted_label = np.argmax(outputs.logits.detach().cpu().numpy())
    return predicted_label


def main(modified_data):
    # Load the fine-tuned model from the saved state dict
    model_path = "best_model_bert_short.pt"
    tokenizer, model = load_model(model_path)

    true_labels = []
    predicted_labels = []

    # Iterate through the modified_data
    for item in modified_data:
        test_sentence = item['text']
        true_label = item['fake']

        # Preprocess the test sentence and get the predicted label
        input_ids, attention_mask = preprocess_text(
            tokenizer, test_sentence, max_length=512
        )
        predicted_label = get_prediction(model, input_ids, attention_mask)

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 score: {f1:.4f}")



if __name__ == "__main__":
    main(modified_data)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Accuracy: 0.6018
F1 score: 0.5489
