In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
import torch
from transformers import AdamW
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm

# Dataset

In [4]:
data = pd.read_json("Sarcasm_Headlines_Dataset.json", lines=True)
labels = data.is_sarcastic.values
sentences = data.headline.values
data.head()

Unnamed: 0,is_sarcastic,headline,article_link
0,1,thirtysomething scientists unveil doomsday clo...,https://www.theonion.com/thirtysomething-scien...
1,0,dem rep. totally nails why congress is falling...,https://www.huffingtonpost.com/entry/donna-edw...
2,0,eat your veggies: 9 deliciously different recipes,https://www.huffingtonpost.com/entry/eat-your-...
3,1,inclement weather prevents liar from getting t...,https://local.theonion.com/inclement-weather-p...
4,1,mother comes pretty close to using word 'strea...,https://www.theonion.com/mother-comes-pretty-c...


In [5]:
# train_sents,test_sents, train_labels, test_labels  = train_test_split(sentences,labels,test_size=0.15)
data_train = pd.read_json("train_Sarcasm_Headlines_Dataset.json", lines=True)
train_labels = data_train.is_sarcastic.values
train_sents = data_train.headline.values

data_test = pd.read_json("test_Sarcasm_Headlines_Dataset.json", lines=True)
test_labels = data_test.is_sarcastic.values
test_sents = data_test.headline.values


In [6]:
sentences

array(['thirtysomething scientists unveil doomsday clock of hair loss',
       'dem rep. totally nails why congress is falling short on gender, racial equality',
       'eat your veggies: 9 deliciously different recipes', ...,
       'the most beautiful acceptance speech this week came from a queer korean',
       'mars probe destroyed by orbiting spielberg-gates space palace',
       'dad clarifies this not a food stop'], dtype=object)

In [7]:
sentence_list = []

for sentence in sentences:
    sentence_list.append(sentence)

# Comparison of the original BERT-based models

In [8]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from transformers import BertTokenizer, BertModel, RobertaTokenizer, RobertaModel
from tqdm import tqdm

bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaModel.from_pretrained('roberta-base')

# def get_embeddings(model, tokenizer, sentences, max_length=64):
#     inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True)
#     with torch.no_grad():
#         outputs = model(**inputs)
#     embeddings = outputs.last_hidden_state[:, 0, :].numpy()
#     return embeddings

def get_embeddings(model, tokenizer, sentences, batch_size=64):
    num_sentences = len(sentences)
    embeddings = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    for start_index in tqdm(range(0, num_sentences, batch_size)):
        batch_sentences = sentences[start_index:start_index + batch_size]
        inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True)
        inputs = {key: value.to(device) for key, value in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)

        batch_embeddings = outputs.last_hidden_state[:, 0, :].detach().cpu().numpy()
        embeddings.append(batch_embeddings)

    embeddings = np.concatenate(embeddings, axis=0)
    return embeddings

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
bert_embeddings_train = get_embeddings(bert_model, bert_tokenizer, train_sents.tolist())
bert_embeddings_test = get_embeddings(bert_model, bert_tokenizer, test_sents.tolist())

# X_train_bert, X_test_bert, y_train, y_test = train_test_split(bert_embeddings, labels, test_size=0.3, random_state=42)

log_reg_model_bert = LogisticRegression()
log_reg_model_bert.fit(bert_embeddings_train, train_labels)

accuracy_bert = log_reg_model_bert.score(bert_embeddings_test, test_labels)
print("BERT Accuracy:", accuracy_bert)

100%|██████████| 381/381 [00:24<00:00, 15.71it/s]
100%|██████████| 68/68 [00:04<00:00, 14.95it/s]


BERT Accuracy: 0.8581741965533303


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
np.save("bert_embeddings_train.npy", bert_embeddings_train)
np.save("bert_embeddings_test.npy", bert_embeddings_test)

In [11]:
roberta_embeddings_train = get_embeddings(roberta_model, roberta_tokenizer, train_sents.tolist())
roberta_embeddings_test = get_embeddings(roberta_model, roberta_tokenizer, test_sents.tolist())

# X_train_roberta, X_test_roberta, y_train, y_test = train_test_split(roberta_embeddings, labels, test_size=0.3, random_state=42)

# Fit logistic regression model for RoBERTa
log_reg_model_roberta = LogisticRegression()
log_reg_model_roberta.fit(roberta_embeddings_train, train_labels)

# Evaluate the RoBERTa model
accuracy_roberta = log_reg_model_roberta.score(roberta_embeddings_test, test_labels)
print("RoBERTa Accuracy:", accuracy_roberta)

100%|██████████| 381/381 [00:20<00:00, 18.40it/s]
100%|██████████| 68/68 [00:03<00:00, 17.71it/s]


RoBERTa Accuracy: 0.8497904052165812


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
from transformers import AlbertTokenizer, AlbertModel

albert_tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
albert_model = AlbertModel.from_pretrained("albert-base-v2")

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

In [13]:
albert_embeddings_train = get_embeddings(albert_model, albert_tokenizer, train_sents.tolist())
albert_embeddings_test = get_embeddings(albert_model, albert_tokenizer, test_sents.tolist())

# X_train_roberta, X_test_roberta, y_train, y_test = train_test_split(roberta_embeddings, labels, test_size=0.3, random_state=42)

log_reg_model_albert = LogisticRegression(max_iter=1000)
log_reg_model_albert.fit(albert_embeddings_train, train_labels)

accuracy_albert = log_reg_model_albert.score(albert_embeddings_test, test_labels)
print("Albert Accuracy:", accuracy_albert)

100%|██████████| 381/381 [00:24<00:00, 15.24it/s]
100%|██████████| 68/68 [00:04<00:00, 14.21it/s]


Albert Accuracy: 0.8500232883092688


In [14]:
from transformers import ElectraTokenizer, ElectraModel

electra_tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
electra_model = ElectraModel.from_pretrained("google/electra-base-discriminator")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


In [15]:
electra_embeddings_train = get_embeddings(electra_model, electra_tokenizer, train_sents.tolist())
electra_embeddings_test = get_embeddings(electra_model, electra_tokenizer, test_sents.tolist())

# X_train_roberta, X_test_roberta, y_train, y_test = train_test_split(roberta_embeddings, labels, test_size=0.3, random_state=42)

log_reg_model_electra = LogisticRegression(max_iter=1000)
log_reg_model_electra.fit(electra_embeddings_train, train_labels)

accuracy_electra = log_reg_model_electra.score(electra_embeddings_test, test_labels)
print("Electra Accuracy:", accuracy_electra)

100%|██████████| 381/381 [00:23<00:00, 15.94it/s]
100%|██████████| 68/68 [00:04<00:00, 14.94it/s]


Electra Accuracy: 0.8663251047973917


# Finetuning of BERT-based models for news headlines

In [16]:
# Dataset class
class SarcasmDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [17]:
from transformers import AutoTokenizer, BertForSequenceClassification, RobertaTokenizer, RobertaForSequenceClassification
import matplotlib.pyplot as plt
from transformers import ElectraForSequenceClassification

# Function to train and evaluate a BERT-based model
def train_evaluate_model(model_type, model_path_to_save, train_texts, train_labels, test_texts, test_labels, max_length=128, batch_size=16, num_epochs=3, learning_rate=2e-5):
    # Load pre-trained model and tokenizer
    if "bert" == model_type:
        model_name = "bert-base-uncased"
        model_class = BertForSequenceClassification
        tokenizer_model_name = 'google-bert/bert-base-uncased'
    elif "roberta" == model_type:
        model_name = "roberta-base"
        model_class = RobertaForSequenceClassification
        tokenizer_model_name = 'FacebookAI/roberta-base'
    elif "electra" == model_type:
        model_name = "google/electra-base-discriminator"
        model_class = ElectraForSequenceClassification
        tokenizer_model_name = 'google/electra-base-discriminator'
    else:
        raise ValueError("Unsupported model name")

    tokenizer = AutoTokenizer.from_pretrained(tokenizer_model_name)
    model = model_class.from_pretrained(model_name, num_labels=2)

    train_dataset = SarcasmDataset(train_texts, train_labels, tokenizer, max_length)
    test_dataset = SarcasmDataset(test_texts, test_labels, tokenizer, max_length)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    criterion = torch.nn.CrossEntropyLoss()

    # Training loop
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses = []
    test_f1s = []
    test_accuracies = []

    best_accuracy = 0.0
    best_f1 = 0.0

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            loss.backward()
            optimizer.step()

        # print(f"Average training loss for epoch {epoch + 1}: {total_loss / len(train_loader)}")
        train_loss = total_loss / len(train_loader)
        train_losses.append(train_loss)

        # Evaluation
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Evaluating"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()

                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())

        test_accuracy = accuracy_score(all_labels, all_preds)
        test_f1 = f1_score(all_labels, all_preds)

        test_accuracies.append(test_accuracy)
        test_f1s.append(test_f1)

        # Plotting
        # plt.figure(figsize=(12, 6))
        # plt.plot(range(1, epoch + 2), train_losses[:epoch+1], label='Train')
        # plt.title('Loss')
        # plt.xlabel('Epochs')
        # plt.ylabel('Loss')
        # plt.legend()

        # plt.subplot(1, 2, 2)
        # plt.plot(range(1, epoch + 2), test_accuracies[:epoch+1], label='acc')
        # plt.plot(range(1, epoch + 2), test_f1s[:epoch+1], label='f1')
        # plt.title('Accuracy and f1 score')
        # plt.xlabel('Epochs')
        # plt.ylabel('Accuracy and f1')
        # plt.legend()

        # plt.tight_layout()
        # plt.show()

    best_model_path = f"{model_path_to_save}_{num_epochs}_epochs"
    model.save_pretrained(best_model_path)

    print(f"Accuracy of {model_name}: {test_accuracy}")
    print(f"F1-score of {model_name}: {test_f1}")
    print()
    print()

    return model

In [18]:
train_texts = train_sents
train_labels = train_labels
test_texts = test_sents
test_labels = test_labels

models_to_try = ["electra", "roberta", "bert"]

for model_name in models_to_try:
    print(f"Evaluating model: {model_name}")
    num_epochs = 5
    model_path_to_save = model_name
    model = train_evaluate_model(model_name, model_path_to_save,
                                 train_texts, train_labels, 
                                 test_texts, test_labels, 
                                 num_epochs=num_epochs)


Evaluating model: electra


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 1521/1521 [04:08<00:00,  6.11it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.32it/s]
Epoch 2/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.26it/s]
Epoch 3/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.10it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.24it/s]
Epoch 4/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.10it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.24it/s]
Epoch 5/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.10it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.2

Accuracy of google/electra-base-discriminator: 0.9434094084769445
F1-score of google/electra-base-discriminator: 0.940103524771999


Evaluating model: roberta


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 1521/1521 [04:13<00:00,  6.00it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 19.09it/s]
Epoch 2/5: 100%|██████████| 1521/1521 [04:13<00:00,  5.99it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 19.10it/s]
Epoch 3/5: 100%|██████████| 1521/1521 [04:14<00:00,  5.99it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.97it/s]
Epoch 4/5: 100%|██████████| 1521/1521 [04:13<00:00,  6.00it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 19.08it/s]
Epoch 5/5: 100%|██████████| 1521/1521 [04:13<00:00,  5.99it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 19.08it/s]


Accuracy of roberta-base: 0.9392175128085701
F1-score of roberta-base: 0.9365117976161518


Evaluating model: bert


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.04it/s]
Epoch 2/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.07it/s]
Epoch 3/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.10it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.17it/s]
Epoch 4/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.09it/s]
Epoch 5/5: 100%|██████████| 1521/1521 [04:09<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.05it/s]


Accuracy of bert-base-uncased: 0.9280391243595715
F1-score of bert-base-uncased: 0.9246892517669998




In [26]:
# function to save embeddings of best models
def get_embeddings(model, tokenizer, sentences, batch_size=64):
    num_sentences = len(sentences)
    embeddings = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    for start_index in tqdm(range(0, num_sentences, batch_size)):
        batch_sentences = sentences[start_index:start_index + batch_size]
        inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True)
        inputs = {key: value.to(device) for key, value in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs, output_hidden_states=True)
            hidden_states = outputs.hidden_states  # Get all hidden states

        batch_embeddings = hidden_states[-1][:, 0, :].detach().cpu().numpy()
        embeddings.append(batch_embeddings)

    embeddings = np.concatenate(embeddings, axis=0)
    return embeddings

# Finetuning of BERT-based models for news headlines with SYNTHETIC data

In [27]:
# train_sents,test_sents, train_labels, test_labels  = train_test_split(sentences,labels,test_size=0.15)
data_train = pd.read_json("train_Sarcasm_Headlines_Dataset.json", lines=True)
train_labels = data_train.is_sarcastic.values
train_sents = data_train.headline.values

data_test = pd.read_json("test_Sarcasm_Headlines_Dataset.json", lines=True)
test_labels = data_test.is_sarcastic.values
test_sents = data_test.headline.values


In [28]:
data_gen = pd.read_csv("sarcasm_headlines_synthetic__FULL__Llama_3_topp95_temp_7.csv")
labels_gen = data_gen.is_sarcastic.values
sentences_gen = data_gen.generated_sentence.values
data_gen.head()
# sentences_gen

Unnamed: 0.1,Unnamed: 0,generated_sentence,is_sarcastic
0,0,"""Cat Owners Demand Answers After Feline Overlo...",1
1,1,"""Experts Warn: Climate Change Denial to Cost E...",0
2,2,"""Experts Warn of ""Climate Catastrophe"": 10 Ala...",0
3,3,"""Cats Proven to be More Interesting Than Human...",1
4,4,"""Cats Finally Learn to Not Sh*t on the Same Sp...",1


In [30]:
train_texts_gen = np.concatenate((train_sents, sentences_gen))
train_labels_gen = np.concatenate((train_labels, labels_gen))
test_texts_gen = test_sents
test_labels_gen = test_labels

models_to_try = [ "electra"]

for model_name in models_to_try:
    print(f"Evaluating model: {model_name}")
    num_epochs = 5
    model_path_to_save = model_name + "_with_synt"
    model = train_evaluate_model(model_name, model_path_to_save,
                                 train_texts_gen, train_labels_gen,
                                 test_texts_gen, test_labels_gen, 
                                 num_epochs=num_epochs)

    


Evaluating model: electra


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 3309/3309 [09:03<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.20it/s]
Epoch 2/5: 100%|██████████| 3309/3309 [09:03<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.27it/s]
Epoch 3/5: 100%|██████████| 3309/3309 [09:03<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.17it/s]
Epoch 4/5: 100%|██████████| 3309/3309 [09:03<00:00,  6.08it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.19it/s]
Epoch 5/5: 100%|██████████| 3309/3309 [09:03<00:00,  6.09it/s]
Evaluating: 100%|██████████| 269/269 [00:14<00:00, 18.1

Accuracy of google/electra-base-discriminator: 0.9457382394038193
F1-score of google/electra-base-discriminator: 0.9419965148120488




# Cross domain testing

In [31]:
# train_sents,test_sents, train_labels, test_labels  = train_test_split(sentences,labels,test_size=0.15)
data_train = pd.read_json("train_Sarcasm_Headlines_Dataset.json", lines=True)
train_labels = data_train.is_sarcastic.values
train_sents = data_train.headline.values

data_test = pd.read_json("test_Sarcasm_Headlines_Dataset.json", lines=True)
test_labels = data_test.is_sarcastic.values
test_sents = data_test.headline.values


In [32]:
data1 = pd.read_csv("isarcasm_test.csv")
data2 = pd.read_csv("isarcasm_train.csv")

labels_isarc = data1.sarcastic.values
sentences_isarc = data1.text.values

# labels_isarc =  np.concatenate((data1.sarcastic.values,
#                                 data2.sarcastic.values))
# sentences_isarc = np.concatenate((data1.text.values,
#                                   data2.tweet.values))
data1.head()

Unnamed: 0,text,sarcastic
0,"Size on the the Toulouse team, That pack is mo...",0
1,Pinball!,0
2,So the Scottish Government want people to get ...,1
3,villainous pro tip : change the device name on...,0
4,I would date any of these men 🥺,0


In [33]:
train_texts_full = np.concatenate((train_sents, test_sents))
train_labels_full = np.concatenate((train_labels, test_labels))
test_texts_isarc = sentences_isarc
test_labels_isarc = labels_isarc

models_to_try = ["electra"]

for model_name in models_to_try:
    print(f"Evaluating model: {model_name}")
    num_epochs = 5
    model_path_to_save = model_name + "_full"
    model = train_evaluate_model(model_name, model_path_to_save,
                                 train_texts_full, train_labels_full,
                                 test_texts_isarc, test_labels_isarc, 
                                 num_epochs=num_epochs)

    


Evaluating model: electra


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 1789/1789 [04:53<00:00,  6.10it/s]
Evaluating: 100%|██████████| 88/88 [00:04<00:00, 18.21it/s]
Epoch 2/5: 100%|██████████| 1789/1789 [04:53<00:00,  6.10it/s]
Evaluating: 100%|██████████| 88/88 [00:04<00:00, 18.17it/s]
Epoch 3/5: 100%|██████████| 1789/1789 [04:53<00:00,  6.10it/s]
Evaluating: 100%|██████████| 88/88 [00:04<00:00, 18.12it/s]
Epoch 4/5: 100%|██████████| 1789/1789 [04:53<00:00,  6.10it/s]
Evaluating: 100%|██████████| 88/88 [00:04<00:00, 18.26it/s]
Epoch 5/5: 100%|██████████| 1789/1789 [04:53<00:00,  6.10it/s]
Evaluating: 100%|██████████| 88/88 [00:04<00:00, 18.17it/s]


Accuracy of google/electra-base-discriminator: 0.6642857142857143
F1-score of google/electra-base-discriminator: 0.1840277777777778


