In [1]:
from sentence_transformers import SentenceTransformer, util,losses
import math
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score,precision_score,recall_score
from sentence_transformers.readers import InputExample
from torch.utils.data import DataLoader

## Headlines

### Data Loading and Pre-processing

In [2]:
sarcasm_headlines_train = pd.read_json("deep_learning_project/deep_learning_project/Sarcasm Headlines/train_Sarcasm_Headlines_Dataset.json",lines=True)
sarcasm_headlines_test = pd.read_json("deep_learning_project/deep_learning_project/Sarcasm Headlines/test_Sarcasm_Headlines_Dataset.json",lines=True)

In [3]:
synthetic_headlines = pd.read_csv("deep_learning_project/deep_learning_project/Sarcasm Headlines/sarcasm_headlines_synthetic__FULL__Llama_3_topp95_temp_7.csv")

In [4]:
#select 8000 rows from both classes in training set
synthetic_sample = synthetic_headlines.groupby('is_sarcastic').head(8000)
synthetic_sample = synthetic_sample.sample(frac=1,random_state=0).reset_index(drop=True)
synthetic_sample = synthetic_sample.rename(columns={"generated_sentence":"headline"})
synthetic_sample.head(3)

Unnamed: 0.1,Unnamed: 0,headline,is_sarcastic
0,841,"""Nation Breathes Collective Sigh of Relief as ...",1
1,8650,"""Greta Thunberg Arrested Protesting Outside UN...",0
2,3701,"""Breaking: Scientists Discover Cure for Monday...",1


In [5]:
#select 8000 rows from both classes in training set
sarcasm_headlines_sample = sarcasm_headlines_train.groupby('is_sarcastic').head(8000)
sarcasm_headlines_sample = sarcasm_headlines_sample.sample(frac=1,random_state=0).reset_index(drop=True)
sarcasm_headlines_sample.head(3)

Unnamed: 0,is_sarcastic,headline,article_link
0,1,alcohol goes right back to abuser every time,https://local.theonion.com/alcohol-goes-right-...
1,0,man accused of shooting and burning 2 people a...,https://www.huffingtonpost.com/entry/south-car...
2,1,james harden pretty sure he felt something pop...,https://sports.theonion.com/james-harden-prett...


In [6]:
sarcasm_headlines_sample_with_synthetic = pd.concat([sarcasm_headlines_sample,synthetic_sample],ignore_index=True)
sarcasm_headlines_sample_with_synthetic['is_sarcastic'].value_counts()

is_sarcastic
1    16000
0    16000
Name: count, dtype: int64

### Training and Evaluation

In [7]:
def evaluate(train_dataframe, test_dataframe, model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')):

    print("Starting Evaluation:")

    #calculate embeddings on train and test
    if 'headline' in train_dataframe.columns:
        embeddings_train = model.encode(train_dataframe['headline'], batch_size=32, show_progress_bar=True)
        embeddings_test = model.encode(test_dataframe['headline'], batch_size=32, show_progress_bar=True)
    else:
        embeddings_train = model.encode(train_dataframe['tweet'], batch_size=32, show_progress_bar=True)
        embeddings_test = model.encode(test_dataframe['text'], batch_size=32, show_progress_bar=True)

    # Calculate cosine similarity   
    cosine_scores = np.array(util.pytorch_cos_sim(embeddings_test, embeddings_train))
    
    # Find the highest similarity score for each sentence
    highest_score = np.max(cosine_scores, axis=1)
    
    # Find the index of the highest similarity score
    highest_score_index = np.argmax(cosine_scores, axis=1)

    if 'is_sarcastic' not in train_dataframe.columns:
        train_dataframe = train_dataframe.rename(columns = {'sarcastic':'is_sarcastic'})
        test_dataframe = test_dataframe.rename(columns = {'sarcastic':'is_sarcastic'})
    
    #find the class of the highest similarity score
    predicted_class = np.array(train_dataframe['is_sarcastic'][highest_score_index])
    
    # Calculate accuracy
    accuracy = np.mean(predicted_class == np.array(test_dataframe['is_sarcastic']))
    
    # Calculate F1 score
    
    f1 = f1_score(test_dataframe['is_sarcastic'], predicted_class)

    return accuracy, f1



In [8]:
def train_mpnet_headlines(synthetic=False):
    
    #train model using  hard negatives triplet loss
    
    # Define the train dataset
    train_samples = []
    if not synthetic:
        for i in range(len(sarcasm_headlines_sample)):
            train_samples.append(InputExample(texts=[sarcasm_headlines_sample['headline'][i]], label= sarcasm_headlines_sample['is_sarcastic'][i]))
    else:
        for i in range(len(sarcasm_headlines_sample_with_synthetic)):
            train_samples.append(InputExample(texts=[sarcasm_headlines_sample_with_synthetic['headline'][i]], label= sarcasm_headlines_sample_with_synthetic['is_sarcastic'][i]))
        
    print("Number of train samples:", len(train_samples))
    
    model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    
    train_dataloader = DataLoader(train_samples, shuffle=False, batch_size=32)
    
    # Define the loss function
    train_loss = losses.BatchHardSoftMarginTripletLoss(model)

    num_epochs = 10

    warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up

    print("Starting Training:")
    
    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=num_epochs, warmup_steps=warmup_steps, show_progress_bar=True)

    return model

In [9]:
# Baseline Model
accuracy, f1 = evaluate(sarcasm_headlines_sample, sarcasm_headlines_test)

print(accuracy, f1)

Starting Evaluation:


Batches:   0%|          | 0/500 [00:00<?, ?it/s]

Batches:   0%|          | 0/135 [00:00<?, ?it/s]

0.7289240801117839 0.7073906485671191


In [10]:
# Fine-Tuned Model without synthetic
headlines_model = train_mpnet_headlines()

accuracy, f1 = evaluate(sarcasm_headlines_sample, sarcasm_headlines_test, headlines_model)

print(accuracy, f1)

Number of train samples: 16000




Starting Training:


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Iteration:   0%|          | 0/500 [00:00<?, ?it/s]

Starting Evaluation:


Batches:   0%|          | 0/500 [00:00<?, ?it/s]

Batches:   0%|          | 0/135 [00:00<?, ?it/s]

0.9436422915696321 0.9413191076624636


In [11]:
# Fine-Tuned Model with synthetic
headlines_model = train_mpnet_headlines(synthetic=True)

accuracy, f1 = evaluate(sarcasm_headlines_sample_with_synthetic, sarcasm_headlines_test,headlines_model)

print(accuracy, f1)

Number of train samples: 32000




Starting Training:


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1000 [00:00<?, ?it/s]

Starting Evaluation:


Batches:   0%|          | 0/1000 [00:00<?, ?it/s]

Batches:   0%|          | 0/135 [00:00<?, ?it/s]

0.9310666045645086 0.9262948207171315


## iSarcasm

### Data Loading and Pre-processing

In [12]:
isarcasm_train = pd.read_csv("deep_learning_project/deep_learning_project/iSarcasm/isarcasm_train.csv")
isarcasm_test = pd.read_csv("deep_learning_project/deep_learning_project/iSarcasm/isarcasm_test.csv")

In [13]:
#select 800 rows from both classes in training set
isarcasm_sample = isarcasm_train.groupby('sarcastic').head(800)
isarcasm_sample = isarcasm_sample.sample(frac=1,random_state=0).reset_index(drop=True)
isarcasm_sample['sarcastic'].value_counts()

sarcastic
0    800
1    800
Name: count, dtype: int64

In [14]:
#select 800 rows from both classes in synthetic set
synthetic_isarcasm = pd.read_csv("deep_learning_project/deep_learning_project/iSarcasm/isarcasm_synthetic__FULL__llama3_topp_95_temp_7.csv")
synthetic_isarcasm = synthetic_isarcasm.rename(columns={"generated_sentence":"tweet"})
synthetic_sample = synthetic_isarcasm.groupby('sarcastic').head(800)
synthetic_sample = synthetic_sample.sample(frac=1,random_state=0).reset_index(drop=True)
synthetic_sample['sarcastic'].value_counts()

sarcastic
0    800
1    800
Name: count, dtype: int64

In [15]:
isarcasm_sample_with_synthetic = pd.concat([isarcasm_sample,synthetic_sample],ignore_index=True)
isarcasm_sample_with_synthetic['sarcastic'].value_counts()

sarcastic
0    1600
1    1600
Name: count, dtype: int64

### Training and Evaluation

In [16]:
def train_mpnet_isarcasm(synthetic=False):
    train_samples = []
    
    if not synthetic:
        for i in range(len(isarcasm_sample)):
            train_samples.append(InputExample(texts=[isarcasm_sample['tweet'][i]], label= isarcasm_sample['sarcastic'][i]))
    else:
        for i in range(len(isarcasm_sample_with_synthetic)):
            train_samples.append(InputExample(texts=[isarcasm_sample_with_synthetic['tweet'][i]], label= isarcasm_sample_with_synthetic['sarcastic'][i]))
        
    print("Number of train samples:", len(train_samples))
    
    model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    
    train_dataloader = DataLoader(train_samples, shuffle=False, batch_size=32)
    
    # Define the loss function
    train_loss = losses.BatchHardSoftMarginTripletLoss(model)

    num_epochs = 10

    warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up

    print("Starting Training:")
    
    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=num_epochs, warmup_steps=warmup_steps, show_progress_bar=True)
    
    return model

In [17]:
# Baseline Model
accuracy, f1 = evaluate(isarcasm_train, isarcasm_test)

print(accuracy, f1)

Starting Evaluation:


Batches:   0%|          | 0/109 [00:00<?, ?it/s]

Batches:   0%|          | 0/44 [00:00<?, ?it/s]

0.6985714285714286 0.2271062271062271


In [18]:
# Fine-Tuned Model without synthetic
isarcasm_model = train_mpnet_isarcasm()

accuracy, f1 = evaluate(isarcasm_train, isarcasm_test,isarcasm_model)

print(accuracy, f1)

Number of train samples: 1600




Starting Training:


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/50 [00:00<?, ?it/s]

Starting Evaluation:


Batches:   0%|          | 0/109 [00:00<?, ?it/s]

Batches:   0%|          | 0/44 [00:00<?, ?it/s]

0.6671428571428571 0.20748299319727892


In [22]:
# Fine-Tuned Model with synthetic
isarcasm_model = train_mpnet_isarcasm(synthetic=True)

accuracy, f1 = evaluate(isarcasm_sample_with_synthetic, isarcasm_test,isarcasm_model)

print(accuracy, f1)

Number of train samples: 3200




Starting Training:


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Iteration:   0%|          | 0/100 [00:00<?, ?it/s]

Starting Evaluation:


Batches:   0%|          | 0/100 [00:00<?, ?it/s]

Batches:   0%|          | 0/44 [00:00<?, ?it/s]

0.6678571428571428 0.30284857571214396


## Cross Domain Evaluation

In [20]:
# Headlines Model on iSarcasm
accuracy, f1 = evaluate(isarcasm_train, isarcasm_test, headlines_model)

print(accuracy, f1)

Starting Evaluation:


Batches:   0%|          | 0/109 [00:00<?, ?it/s]

Batches:   0%|          | 0/44 [00:00<?, ?it/s]

0.6635714285714286 0.1751313485113835


In [21]:
# iSarcasm Model on Headlines
accuracy, f1 = evaluate(sarcasm_headlines_sample, sarcasm_headlines_test, isarcasm_model)

print(accuracy, f1)

Starting Evaluation:


Batches:   0%|          | 0/500 [00:00<?, ?it/s]

Batches:   0%|          | 0/135 [00:00<?, ?it/s]

0.7550069864927806 0.7368684342171087
