In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Overview of english training data(train.En.csv)

In [2]:
data_en = pd.read_csv('train/train.En.csv')

In [3]:
data_en

Unnamed: 0.1,Unnamed: 0,tweet,sarcastic,rephrase,sarcasm,irony,satire,understatement,overstatement,rhetorical_question
0,0,The only thing I got from college is a caffein...,1,"College is really difficult, expensive, tiring...",0.0,1.0,0.0,0.0,0.0,0.0
1,1,I love it when professors draw a big question ...,1,I do not like when professors don’t write out ...,1.0,0.0,0.0,0.0,0.0,0.0
2,2,Remember the hundred emails from companies whe...,1,"I, at the bare minimum, wish companies actuall...",0.0,1.0,0.0,0.0,0.0,0.0
3,3,Today my pop-pop told me I was not “forced” to...,1,"Today my pop-pop told me I was not ""forced"" to...",1.0,0.0,0.0,0.0,0.0,0.0
4,4,@VolphanCarol @littlewhitty @mysticalmanatee I...,1,I would say Ted Cruz is an asshole and doesn’t...,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
3463,3463,The population spike in Chicago in 9 months is...,0,,,,,,,
3464,3464,You'd think in the second to last English clas...,0,,,,,,,
3465,3465,I’m finally surfacing after a holiday to Scotl...,0,,,,,,,
3466,3466,Couldn't be prouder today. Well done to every ...,0,,,,,,,


In [4]:
data_en.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3468 entries, 0 to 3467
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Unnamed: 0           3468 non-null   int64  
 1   tweet                3467 non-null   object 
 2   sarcastic            3468 non-null   int64  
 3   rephrase             867 non-null    object 
 4   sarcasm              867 non-null    float64
 5   irony                867 non-null    float64
 6   satire               867 non-null    float64
 7   understatement       867 non-null    float64
 8   overstatement        867 non-null    float64
 9   rhetorical_question  867 non-null    float64
dtypes: float64(6), int64(2), object(2)
memory usage: 271.1+ KB


## Remove the data which is NaN，only 1 line

In [5]:
data_en= data_en.dropna(subset=['tweet'])

In [6]:
data_en

Unnamed: 0.1,Unnamed: 0,tweet,sarcastic,rephrase,sarcasm,irony,satire,understatement,overstatement,rhetorical_question
0,0,The only thing I got from college is a caffein...,1,"College is really difficult, expensive, tiring...",0.0,1.0,0.0,0.0,0.0,0.0
1,1,I love it when professors draw a big question ...,1,I do not like when professors don’t write out ...,1.0,0.0,0.0,0.0,0.0,0.0
2,2,Remember the hundred emails from companies whe...,1,"I, at the bare minimum, wish companies actuall...",0.0,1.0,0.0,0.0,0.0,0.0
3,3,Today my pop-pop told me I was not “forced” to...,1,"Today my pop-pop told me I was not ""forced"" to...",1.0,0.0,0.0,0.0,0.0,0.0
4,4,@VolphanCarol @littlewhitty @mysticalmanatee I...,1,I would say Ted Cruz is an asshole and doesn’t...,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
3463,3463,The population spike in Chicago in 9 months is...,0,,,,,,,
3464,3464,You'd think in the second to last English clas...,0,,,,,,,
3465,3465,I’m finally surfacing after a holiday to Scotl...,0,,,,,,,
3466,3466,Couldn't be prouder today. Well done to every ...,0,,,,,,,


## Word embding wit TF-Idf and use SVM for prediction

In [7]:
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(data_en['tweet'])
y = data_en['sarcastic']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
model = SVC(kernel='linear')
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.72      1.00      0.83       495
           1       0.80      0.02      0.04       199

    accuracy                           0.72       694
   macro avg       0.76      0.51      0.44       694
weighted avg       0.74      0.72      0.61       694



Result of the prediction of label1 is really bad, try other word embding methods.

# BERT word embding and the result of SVM

In [10]:
import torch
from transformers import BertTokenizer, BertModel

In [11]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

texts = data_en['tweet'].tolist()
labels = data_en['sarcastic'].tolist()
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)

In [12]:
texts = data_en['tweet'].tolist()
labels = data_en['sarcastic'].tolist()
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
with torch.no_grad():
    outputs = model(**inputs)
    features = outputs.last_hidden_state[:,0,:].numpy() 

In [13]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
predictions = svm_model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.73      0.82      0.77       495
           1       0.36      0.25      0.29       199

    accuracy                           0.66       694
   macro avg       0.55      0.54      0.53       694
weighted avg       0.62      0.66      0.64       694



The result is much more balanced, the prediction for label1( is sarcasm ) is much better. But time for embding is a little long. We try Word2Vec.

# Word2Vec and SVM

In [14]:
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
from gensim.models import Word2Vec

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\1\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [15]:
word2vec_model = Word2Vec(sentences=texts, vector_size=100, window=5, min_count=1, workers=4)
word2vec_model.train(texts, total_examples=len(texts), epochs=10)

(866258, 3615700)

In [16]:
def get_sentence_vector(words, model):
    vectors = [model.wv[word] for word in words if word in model.wv]
    if len(vectors) == 0:
        return np.zeros(model.vector_size)
    else:
        return np.mean(vectors, axis=0)
sentence_vectors = np.array([get_sentence_vector(text, word2vec_model) for text in texts])

In [17]:
X_train, X_test, y_train, y_test = train_test_split(sentence_vectors, labels, test_size=0.2, random_state=42)

In [18]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
predictions = svm_model.predict(X_test)
print(classification_report(y_test, predictions, zero_division=0))

              precision    recall  f1-score   support

           0       0.71      1.00      0.83       495
           1       0.00      0.00      0.00       199

    accuracy                           0.71       694
   macro avg       0.36      0.50      0.42       694
weighted avg       0.51      0.71      0.59       694



Result is even worse. We use BERT word embding for the other baseline models.

# BERT embding and CNN

In [19]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.optim import Adam
from transformers import RobertaTokenizer, RobertaModel, RobertaForSequenceClassification
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import  get_linear_schedule_with_warmup
import torch.nn.functional as F

In [20]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [21]:
class BERTClassifier(nn.Module):
    def __init__(self, num_features, num_classes):
        super(BERTClassifier, self).__init__()
        self.fc1 = nn.Linear(num_features, 512)  # First fully connected layer
        self.relu = nn.ReLU()  # ReLU activation
        self.dropout = nn.Dropout(0.5)  # Dropout for regularization
        self.fc2 = nn.Linear(512, num_classes)  # Final layer for classification
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_features = 768  # For bert-base-uncased
num_classes = 2  # Assuming binary classification

model = BERTClassifier(num_features=100, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer =  Adam(model.parameters(), lr=1e-5)

epochs = 30
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader)}')


Epoch 1/30, Loss: 0.6256244588981975
Epoch 2/30, Loss: 0.6161920482462103
Epoch 3/30, Loss: 0.6079820015213706
Epoch 4/30, Loss: 0.6001469167796049
Epoch 5/30, Loss: 0.5943822345950387
Epoch 6/30, Loss: 0.5890877003019507
Epoch 7/30, Loss: 0.5863127735528079
Epoch 8/30, Loss: 0.5821100581776012
Epoch 9/30, Loss: 0.5771567144177177
Epoch 10/30, Loss: 0.5738506804813038
Epoch 11/30, Loss: 0.5711738033728166
Epoch 12/30, Loss: 0.5678739859299227
Epoch 13/30, Loss: 0.5678175498138774
Epoch 14/30, Loss: 0.5647146295417439
Epoch 15/30, Loss: 0.5645117597146467
Epoch 16/30, Loss: 0.5633217380805449
Epoch 17/30, Loss: 0.5605911179022356
Epoch 18/30, Loss: 0.5604793619025837
Epoch 19/30, Loss: 0.5606907389380715
Epoch 20/30, Loss: 0.5584921552376314
Epoch 21/30, Loss: 0.5600046339360151
Epoch 22/30, Loss: 0.560171751813455
Epoch 23/30, Loss: 0.5577607276764783
Epoch 24/30, Loss: 0.5582259879870848
Epoch 25/30, Loss: 0.5574367303739894
Epoch 26/30, Loss: 0.5577164509079673
Epoch 27/30, Loss: 0.5

In [22]:
model.eval()  
predictions = []
true_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
print(classification_report(true_labels, predictions, digits=2))

              precision    recall  f1-score   support

           0       0.71      1.00      0.83       495
           1       0.00      0.00      0.00       199

    accuracy                           0.71       694
   macro avg       0.36      0.50      0.42       694
weighted avg       0.51      0.71      0.59       694



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Performance is horrible. There is reason that people will not use CNN for NLP tasks.

# BERT embding and LSTM

In [23]:
class BertLSTM(nn.Module):
    def __init__(self, bert_model, hidden_dim, output_dim, lstm_layers=1, bidirectional=False, dropout=0.1):
        super(BertLSTM, self).__init__()
        self.bert = bert_model
        self.lstm = nn.LSTM(768, hidden_dim, num_layers=lstm_layers, bidirectional=bidirectional, batch_first=True, dropout=dropout if lstm_layers > 1 else 0)
        lstm_output_dim = hidden_dim * 2 if bidirectional else hidden_dim
        self.classifier = nn.Linear(lstm_output_dim, output_dim)
        
    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            
            outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
            sequence_output = outputs.last_hidden_state
       
        lstm_output, (hidden_state, cell_state) = self.lstm(sequence_output)
       
        if self.lstm.bidirectional:
            lstm_output = torch.cat((hidden_state[-2,:,:], hidden_state[-1,:,:]), dim=1)
        else:
            lstm_output = hidden_state[-1,:,:]
       
        logits = self.classifier(lstm_output)
        return logits


In [24]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')


texts = data_en['tweet'].tolist()
labels = data_en['sarcastic'].tolist()
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
with torch.no_grad():
    outputs = model(**inputs)
    features = outputs.last_hidden_state[:,0,:].numpy() 
    
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [25]:
features = torch.tensor(features)
labels = torch.tensor(labels, dtype=torch.long)

dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertLSTM(bert_model=model, hidden_dim=256, output_dim=2, lstm_layers=1, bidirectional=True).to(device)

optimizer = Adam(model.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()

epochs = 30
model.train()
for epoch in range(epochs):
    for batch in train_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


Epoch 1, Loss: 0.6995932459831238
Epoch 2, Loss: 0.61945641040802
Epoch 3, Loss: 0.5785393714904785
Epoch 4, Loss: 0.576870322227478
Epoch 5, Loss: 0.566392719745636
Epoch 6, Loss: 0.6753127574920654
Epoch 7, Loss: 0.6057244539260864
Epoch 8, Loss: 0.5067143440246582
Epoch 9, Loss: 0.5833205580711365
Epoch 10, Loss: 0.4790968596935272
Epoch 11, Loss: 0.553758978843689
Epoch 12, Loss: 0.5561097264289856
Epoch 13, Loss: 0.5160054564476013
Epoch 14, Loss: 0.5935359001159668
Epoch 15, Loss: 0.4560028612613678
Epoch 16, Loss: 0.48512017726898193
Epoch 17, Loss: 0.558098554611206
Epoch 18, Loss: 0.5697084665298462
Epoch 19, Loss: 0.538184642791748
Epoch 20, Loss: 0.5268934369087219
Epoch 21, Loss: 0.5719056725502014
Epoch 22, Loss: 0.5601086616516113
Epoch 23, Loss: 0.5029844641685486
Epoch 24, Loss: 0.49454957246780396
Epoch 25, Loss: 0.5262494087219238
Epoch 26, Loss: 0.5559357404708862
Epoch 27, Loss: 0.4932132959365845
Epoch 28, Loss: 0.5470383167266846
Epoch 29, Loss: 0.5783883333206177

In [26]:
model.eval()


true_labels = []
pred_labels = []

with torch.no_grad():
    for batch in val_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        outputs = model(input_ids, attention_mask)
        predictions = torch.argmax(outputs, dim=1)
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(predictions.cpu().numpy())


print(classification_report(true_labels, pred_labels, zero_division=0))


              precision    recall  f1-score   support

           0       0.74      1.00      0.85       511
           1       0.00      0.00      0.00       183

    accuracy                           0.74       694
   macro avg       0.37      0.50      0.42       694
weighted avg       0.54      0.74      0.62       694



LSTM is not a good idea neither.                                                                  


# BERT embding and RoBERTa(PTM)

In [27]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2) # Assuming binary classification (e.g., sarcastic vs. not sarcastic)


texts = data_en['tweet'].tolist()
labels = data_en['sarcastic'].tolist()


inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']


labels = torch.tensor(labels)


dataset = TensorDataset(input_ids, attention_mask, labels)


train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


batch_size = 128
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size)
validation_dataloader = DataLoader(val_dataset, sampler=SequentialSampler(val_dataset), batch_size=batch_size)



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
input_ids = inputs['input_ids']
attention_masks = inputs['attention_mask']


labels = torch.tensor(labels)

dataset = TensorDataset(input_ids, attention_masks, labels)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


batch_size = 128
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size)
validation_dataloader = DataLoader(val_dataset, sampler=SequentialSampler(val_dataset), batch_size=batch_size)


optimizer =  Adam(model.parameters(), lr=1e-5)


epochs = 30
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Helper Function for Accuracy Calculation
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

# Training Loop
for epoch_i in range(0, epochs):
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    model.train()
    total_loss = 0

    for step, batch in enumerate(train_dataloader):
        if step % 40 == 0:
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))
        
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()        
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print("  Average training loss: {0:.2f}".format(avg_train_loss))

    # Validation Loop
    model.eval()
    eval_accuracy = 0
    nb_eval_steps = 0

    for batch in validation_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        
        logits = outputs.logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1

    print("  Validation Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))

print("Training complete!")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  labels = torch.tensor(labels)


  Batch     0  of     22.
  Average training loss: 0.60
  Validation Accuracy: 0.76
  Batch     0  of     22.
  Average training loss: 0.57
  Validation Accuracy: 0.76
  Batch     0  of     22.
  Average training loss: 0.55
  Validation Accuracy: 0.76
  Batch     0  of     22.
  Average training loss: 0.54
  Validation Accuracy: 0.76
  Batch     0  of     22.
  Average training loss: 0.50
  Validation Accuracy: 0.76
  Batch     0  of     22.
  Average training loss: 0.45
  Validation Accuracy: 0.75
  Batch     0  of     22.
  Average training loss: 0.41
  Validation Accuracy: 0.73
  Batch     0  of     22.
  Average training loss: 0.39
  Validation Accuracy: 0.74
  Batch     0  of     22.
  Average training loss: 0.33
  Validation Accuracy: 0.74
  Batch     0  of     22.
  Average training loss: 0.28
  Validation Accuracy: 0.73
  Batch     0  of     22.
  Average training loss: 0.24
  Validation Accuracy: 0.74
  Batch     0  of     22.
  Average training loss: 0.20
  Validation Accurac

In [29]:
import torch
from sklearn.metrics import classification_report
from torch.nn.functional import softmax

model.eval()

true_labels = []
pred_labels = []

with torch.no_grad():
    for batch in validation_dataloader:
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        
        # Convert model logits to class probabilities using softmax
        probs = softmax(outputs.logits, dim=1)
        
        # Get the predicted labels
        preds = torch.argmax(probs, dim=1)
        
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(preds.cpu().numpy())

report = classification_report(true_labels, pred_labels, zero_division=0)
print(report)


              precision    recall  f1-score   support

           0       0.79      0.88      0.83       520
           1       0.47      0.32      0.38       174

    accuracy                           0.74       694
   macro avg       0.63      0.60      0.61       694
weighted avg       0.71      0.74      0.72       694



More fine-tuning may needed. Much better than LSTM in variance

# Conlusions

1.We have built 4 baseline models, which can give us an idea about the futhur work.                                  
2.We will try to reproduce the results of SemEval-2022 Task 6: iSarcasmEval, Intended Sarcasm Detection in
English and Arabic.                                                                              
3.We will use data augmentation and more word embding methods trying to have a better result.                                                       
4.XLM-RoBERTa, LIama2, and XGen will also be tried. We will read more papers to have an idea about fine-tuning.
