<a href="https://colab.research.google.com/github/Kishan-prajapati-242/ATCTM/blob/main/notebooks/EC_demo_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch import nn
from torch.optim import AdamW
from tqdm import tqdm
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')



In [None]:
from sklearn.metrics import classification_report


# Load Dataset
df = pd.read_csv('/content/drive/MyDrive/ATCTM/EVENT_CLASSIFICATION/EC-demo.csv')
df.dropna(subset=['TEXT'], inplace=True)

# Shuffle dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Encode categorical columns
le_event = LabelEncoder()
le_emotion = LabelEncoder()
le_tense = LabelEncoder()

# Encoding target labels
df['EVENT_TYPE_ID'] = le_event.fit_transform(df['EVENT_TYPE'])
df['EMOTION_ID'] = le_emotion.fit_transform(df['EMOTION'])
df['TENSE_ID'] = le_tense.fit_transform(df['TENSE'])
df['SARCASM_ID'] = df['SARCASM'].astype(int)

# Split into 800 training and 200 testing manually
train_df = df.iloc[:800]
val_df = df.iloc[800:]

# Dataset Class
class EventDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        encoded = self.tokenizer(row['TEXT'], padding='max_length', truncation=True, max_length=self.max_len, return_tensors='pt')
        return {
            'input_ids': encoded['input_ids'].squeeze(0),
            'attention_mask': encoded['attention_mask'].squeeze(0),
            'event_type': torch.tensor(row['EVENT_TYPE_ID']),
            'emotion': torch.tensor(row['EMOTION_ID']),
            'sarcasm': torch.tensor(row['SARCASM_ID']),
            'tense': torch.tensor(row['TENSE_ID']),
        }

# Improved Model Definition with task-specific projections
class EventClassifier(nn.Module):
    def __init__(self, num_event_types, num_emotions, num_tenses):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.shared = nn.Linear(768, 512)

        self.event_proj = nn.Linear(512, 256)
        self.emotion_proj = nn.Linear(512, 256)
        self.sarcasm_proj = nn.Linear(512, 256)
        self.tense_proj = nn.Linear(512, 256)

        self.event_head = nn.Linear(256, num_event_types)
        self.emotion_head = nn.Linear(256, num_emotions)
        self.sarcasm_head = nn.Linear(256, 1)
        self.tense_head = nn.Linear(256, num_tenses)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = self.dropout(output.pooler_output)
        x = self.shared(x)

        return {
            'event_type': self.event_head(self.event_proj(x)),
            'emotion': self.emotion_head(self.emotion_proj(x)),
            'sarcasm': torch.sigmoid(self.sarcasm_head(self.sarcasm_proj(x))),
            'tense': self.tense_head(self.tense_proj(x))
        }

# Tokenizer and Dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_data = EventDataset(train_df, tokenizer)
val_data = EventDataset(val_df, tokenizer)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)

# Class weights for emotion
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight('balanced', classes=np.unique(df['EMOTION_ID']), y=df['EMOTION_ID'])
emotion_weights = torch.tensor(weights, dtype=torch.float)

# Training Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EventClassifier(len(le_event.classes_), len(le_emotion.classes_), len(le_tense.classes_)).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn_ce = nn.CrossEntropyLoss()
loss_fn_emo = nn.CrossEntropyLoss(weight=emotion_weights.to(device))
loss_fn_bce = nn.BCELoss()

# Training Loop
for epoch in range(4):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask)

        loss_event = loss_fn_ce(outputs['event_type'], batch['event_type'].to(device))
        loss_emo = loss_fn_emo(outputs['emotion'], batch['emotion'].to(device))
        loss_sar = loss_fn_bce(outputs['sarcasm'].squeeze(), batch['sarcasm'].float().to(device))
        loss_tense = loss_fn_ce(outputs['tense'], batch['tense'].to(device))

        loss = loss_event + loss_emo + loss_sar + loss_tense

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")


In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluation Metrics
model.eval()
all_preds = {'event_type': [], 'emotion': [], 'sarcasm': [], 'tense': []}
all_labels = {'event_type': [], 'emotion': [], 'sarcasm': [], 'tense': []}

with torch.no_grad():
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask)

        all_preds['event_type'] += outputs['event_type'].argmax(dim=1).cpu().tolist()
        all_labels['event_type'] += batch['event_type'].tolist()

        all_preds['emotion'] += outputs['emotion'].argmax(dim=1).cpu().tolist()
        all_labels['emotion'] += batch['emotion'].tolist()

        all_preds['tense'] += outputs['tense'].argmax(dim=1).cpu().tolist()
        all_labels['tense'] += batch['tense'].tolist()

        all_preds['sarcasm'] += (outputs['sarcasm'] > 0.5).int().cpu().squeeze().tolist()
        all_labels['sarcasm'] += batch['sarcasm'].tolist()

# === EVENT TYPE REPORT ===
print("\nEvent Type Classification Report:")
print(classification_report(
    all_labels['event_type'],
    all_preds['event_type'],
    target_names=le_event.classes_
))

event_acc = accuracy_score(all_labels['event_type'], all_preds['event_type'])
print(f"\nEvent Type Accuracy: {event_acc * 100:.2f}% out of 100\n")

In [None]:
sample_texts = [
    # got_hired
    "Just signed the offer letter! I'm finally hired at a real tech company!",
    "Out of nowhere, they called back and said I got the job. Unreal!",

    # got_fired
    "Got fired this morning. Still trying to process what just happened.",
    "My manager let me go after a 5-minute meeting. No warnings, nothing.",

    # got_laid_off
    "Our entire department was laid off due to budget cuts.",
    "I was laid off today. Honestly, I saw it coming but it still hurts.",

    # got_promoted
    "After months of hard work, I finally got promoted to team lead!",
    "Surprise email today — I’ve been promoted to senior analyst!",

    # got_demoted
    "Apparently I’m not 'strategic enough'. Just got demoted.",
    "Back to junior level... they really demoted me without proper explanation.",

    # changed_jobs
    "I just switched jobs! New city, new team, new energy!",
    "Left my old company last week — started at a startup today!",

    # started_new_career
    "I quit teaching and officially started my new career in UX design!",
    "Day 1 as a data analyst after leaving hospitality — a fresh chapter begins!",

    # retired
    "Last day at work today — after 40 years, I'm finally retired!",
    "Retired as of this morning. Bittersweet but grateful for the journey.",

    # got_raise
    "Just got a raise! My efforts are finally being recognized!",
    "They bumped my salary up by 10%! Didn’t expect that at all!",

    # got_pay_cut
    "Company reduced my pay again... this is getting ridiculous.",
    "Was told my salary is being slashed due to company restructuring."
]

for txt in sample_texts:
    print(predict_event(txt))