In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

In [None]:
!pip install evaluate
import evaluate
import numpy as np
import pandas as pd
from transformers import DataCollatorWithPadding



In [None]:
df = pd.read_json("rename.json", orient="records")
df = df.T
df = df.reset_index(drop=True)
df.head(10)

# print(df.columns)


Index(['utterance', 'speaker', 'context', 'context_speakers', 'show',
       'sarcasm'],
      dtype='object')


In [None]:
# define pretrained model
model_path = "google-bert/bert-base-uncased"

In [None]:
# load model tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
# load model with binary classification head
id2label = {0: "NOT_SARCASM", 1: "SARCASM"}
label2id = {"NOT_SARCASM": 0, "SARCASM": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_path, num_labels=2, id2label=id2label, label2id=label2id
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# freeze all base model parameters
for name, param in model.base_model.named_parameters():
    param.requires_grad = False

# unfreezebase model pooling layers
for name, param in model.base_model.named_parameters():
    if 'pooler' in name:
        param.requires_grad = True

In [None]:
df.head(10)

Unnamed: 0,utterance,speaker,context,context_speakers,show,sarcasm
0,It's just a privilege to watch your mind at work.,SHELDON,[I never would have identified the fingerprint...,"[LEONARD, SHELDON]",BBT,True
1,I don't think I'll be able to stop thinking ab...,PENNY,[This is one of my favorite places to kick bac...,"[HOWARD, PENNY, HOWARD, HOWARD, HOWARD, PENNY,...",BBT,True
2,"Since it's not bee season, you can have my epi...",SHELDON,"[Here we go. Pad thai, no peanuts., But does i...","[LEONARD, HOWARD, LEONARD]",BBT,False
3,"Lois Lane is falling, accelerating at an initi...",SHELDON,[A marathon? How many Superman movies are ther...,"[PENNY, SHELDON, PENNY, SHELDON, SHELDON, PENN...",BBT,False
4,I'm just inferring this is a couch because the...,SHELDON,"[Great Caesar's ghost, look at this place., So...","[SHELDON, LEONARD, SHELDON, SHELDON, SHELDON, ...",BBT,True
5,"Sheldon, if you were a robot, and I knew and y...",HOWARD,"[On the other hand, no arousal., None?, None.,...","[PERSON, LEONARD, PERSON, LEONARD, PERSON, LEO...",BBT,False
6,If you're compiling a mix CD for a double suic...,SHELDON,"[Oh, good Lord., God, that's a good song.]","[SHELDON, LEONARD]",BBT,True
7,No. The dark crescent-shaped patterns under yo...,SHELDON,"[How do I look?, Could you be more specific?, ...","[LEONARD, SHELDON, LEONARD]",BBT,True
8,"How's this? ""Pleased to meet you, Dr. Gablehau...",SHELDON,"[You know, I am not going to enjoy this party....","[SHELDON, LEONARD, SHELDON, LEONARD, SHELDON, ...",BBT,True
9,"Yeah, my parents felt that naming me Leonard a...",LEONARD,[He switched over to high-energy radiation res...,"[PERSON, PERSON, LEONARD, PERSON, LEONARD, PER...",BBT,True


In [None]:

# Convert sarcasm labels to numerical form
df["label"] = df["sarcasm"].astype(int)

def tokenize_function(example):
    return tokenizer(example["utterance"], truncation=True, padding="max_length", max_length=128)


from datasets import Dataset
dataset = Dataset.from_pandas(df)

# Apply tokenization
tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/690 [00:00<?, ? examples/s]

In [None]:
from datasets import DatasetDict

dataset = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

In [None]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [None]:
# Load evaluation metrics from Hugging Face Hub
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")
roc_auc = evaluate.load("roc_auc")  # Optional: Good for classification

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Convert logits to class labels (0 or 1)

    return {
        "accuracy": accuracy.compute(predictions=predictions, references=labels)["accuracy"],
        "precision": precision.compute(predictions=predictions, references=labels, average="binary")["precision"],
        "recall": recall.compute(predictions=predictions, references=labels, average="binary")["recall"],
        "f1": f1.compute(predictions=predictions, references=labels, average="binary")["f1"],
        "roc_auc": roc_auc.compute(prediction_scores=logits[:, 1], references=labels)["roc_auc"]
    }

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Evaluate after each epoch
    save_strategy="epoch",  # Save best model
    logging_strategy="epoch",  # ✅ Log metrics after each epoch
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    push_to_hub=False,  # Prevent uploading to Hugging Face Hub
    hub_token=None,
    report_to="none"  # Prevent Hugging Face logging
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics  # ✅ Ensures metrics like accuracy, precision are computed
)

  trainer = Trainer(


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.6666,0.67241,0.608696,0.671233,0.620253,0.644737,0.632697
2,0.6609,0.680519,0.57971,0.744186,0.405063,0.52459,0.64321
3,0.6594,0.68641,0.528986,0.694444,0.316456,0.434783,0.64557


TrainOutput(global_step=207, training_loss=0.6622978247306197, metrics={'train_runtime': 890.6159, 'train_samples_per_second': 1.859, 'train_steps_per_second': 0.232, 'total_flos': 108927976919040.0, 'train_loss': 0.6622978247306197, 'epoch': 3.0})

In [None]:
trainer.evaluate()

{'eval_loss': 0.6724098324775696,
 'eval_accuracy': 0.6086956521739131,
 'eval_precision': 0.6712328767123288,
 'eval_recall': 0.620253164556962,
 'eval_f1': 0.6447368421052632,
 'eval_roc_auc': 0.6326968461703497,
 'eval_runtime': 60.123,
 'eval_samples_per_second': 2.295,
 'eval_steps_per_second': 0.299,
 'epoch': 3.0}

In [None]:
def predict_sarcasm(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    outputs = model(**inputs)
    logits = outputs.logits
    prediction = np.argmax(logits.detach().numpy(), axis=-1)
    return id2label[prediction[0]]

# Example:
print(predict_sarcasm("i am not well today"))

NameError: name 'tokenizer' is not defined