<a href="https://colab.research.google.com/github/Flukeshotz/sentiment-analysis-with-sarcasm-detection/blob/main/Sentiment_Sarcasm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Step 1: Install necessary libraries

In [None]:
!pip install transformers
!pip install torch
!pip install scikit-learn
!pip install gradio

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import warnings

warnings.filterwarnings('ignore')
nltk.download('vader_lexicon')

# Load VADER
sia = SentimentIntensityAnalyzer()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load Datasets
reddit = pd.read_csv("Reddit_Data.csv")[["clean_comment", "category"]]
twitter = pd.read_csv("Twitter_Data.csv")[["clean_text", "category"]]
sarc_v1 = pd.read_json("Cleaned_Sarcasm_Headlines_Dataset.json", lines=True)
sarc_v2 = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)[["headline", "is_sarcastic"]]

# 2. Clean & Combine
sarc_v1_flat = pd.json_normalize(sarc_v1[0])[["headline", "is_sarcastic"]]
reddit.columns = ["headline", "is_sarcastic"]
twitter.columns = ["headline", "is_sarcastic"]
data = pd.concat([sarc_v1_flat, sarc_v2, reddit, twitter], ignore_index=True)
data.dropna(inplace=True)
data["is_sarcastic"] = data["is_sarcastic"].astype(int)

# 3. Split
train_texts, test_texts, train_labels, test_labels = train_test_split(
    data["headline"].tolist(), data["is_sarcastic"].tolist(), test_size=0.2, random_state=42)

# 4. Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class SarcasmDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt", max_length=128)
        self.labels = torch.tensor(labels)

    def __getitem__(self, idx):
        return {key: val[idx] for key, val in self.encodings.items()}, self.labels[idx]

    def __len__(self):
        return len(self.labels)

train_dataset = SarcasmDataset(train_texts, train_labels)
test_dataset = SarcasmDataset(test_texts, test_labels)

# 5. Model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.to(device)

# 6. Training
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
epochs = 2

model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch in train_loader:
        inputs, labels = batch
        inputs = {k: v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)
        outputs = model(**inputs, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optim.step()
        optim.zero_grad()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

# 7. Evaluation
model.eval()
test_loader = DataLoader(test_dataset, batch_size=16)
preds, true = [], []

with torch.no_grad():
    for batch in test_loader:
        inputs, labels = batch
        inputs = {k: v.to(device) for k, v in inputs.items()}
        outputs = model(**inputs)
        logits = outputs.logits
        preds.extend(torch.argmax(logits, dim=1).cpu().numpy())
        true.extend(labels.numpy())

print("\nAccuracy:", accuracy_score(true, preds))
print("\nClassification Report:\n", classification_report(true, preds))

# 8. Custom Prediction (Sarcasm + Sentiment)
def predict_sarcasm_and_sentiment(text):
    model.eval()
    encoded = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128)
    encoded = {k: v.to(device) for k, v in encoded.items()}
    with torch.no_grad():
        output = model(**encoded)
        prediction = torch.argmax(output.logits, dim=1).item()

    sarcasm = "Sarcastic" if prediction == 1 else "Not Sarcastic"

    sentiment_score = sia.polarity_scores(text)['compound']
    sentiment = "Positive" if sentiment_score >= 0.05 else "Negative" if sentiment_score <= -0.05 else "Neutral"

    return sarcasm, sentiment, sentiment_score

# 9. Test Loop
while True:
    text = input("\nEnter text (or type 'exit'): ")
    if text.lower() == 'exit':
        break
    sarcasm, sentiment, score = predict_sarcasm_and_sentiment(text)
    print(f"Prediction: {sarcasm} | Sentiment: {sentiment} | Score: {score}")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-5a70e35771fe>", line 52, in <cell line: 0>
    train_dataset = SarcasmDataset(train_texts, train_labels)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-4-5a70e35771fe>", line 43, in __init__
    self.encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt", max_length=128)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/tokenization_utils_base.py", line 2887, in __call__
    encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/tokenizatio

In [None]:
!pip install --upgrade transformers

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.51.1
    Uninstalling transformers-4.51.1:
      Successfully uninstalled transformers-4.51.1
Successfully installed transformers-4.51.3
