<a href="https://colab.research.google.com/github/Kishan-prajapati-242/ATCTM/blob/main/notebooks/EC_demo_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch import nn
from torch.optim import AdamW
from tqdm import tqdm
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedShuffleSplit

df = pd.read_csv('/content/drive/MyDrive/ATCTM/EVENT_CLASSIFICATION/EC-demo.csv')
df.dropna(subset=['TEXT'], inplace=True)

# Encode categorical columns
le_event = LabelEncoder()
le_emotion = LabelEncoder()
le_tense = LabelEncoder()

# Encoding target labels
df['EVENT_TYPE_ID'] = le_event.fit_transform(df['EVENT_TYPE'])
df['EMOTION_ID'] = le_emotion.fit_transform(df['EMOTION'])
df['TENSE_ID'] = le_tense.fit_transform(df['TENSE'])
df['SARCASM_ID'] = df['SARCASM'].astype(int)

splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, val_idx in splitter.split(df, df['EVENT_TYPE_ID']):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)


# Dataset Class
class EventDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        encoded = self.tokenizer(row['TEXT'], padding='max_length', truncation=True, max_length=self.max_len, return_tensors='pt')
        return {
            'input_ids': encoded['input_ids'].squeeze(0),
            'attention_mask': encoded['attention_mask'].squeeze(0),
            'event_type': torch.tensor(row['EVENT_TYPE_ID']),
            'emotion': torch.tensor(row['EMOTION_ID']),
            'sarcasm': torch.tensor(row['SARCASM_ID']),
            'tense': torch.tensor(row['TENSE_ID']),
        }

# Model Definition
class EventClassifier(nn.Module):
    def __init__(self, num_event_types, num_emotions, num_tenses):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.shared = nn.Linear(768, 512)

        self.event_proj = nn.Linear(512, 256)
        self.emotion_proj = nn.Linear(512, 256)
        self.sarcasm_proj = nn.Linear(512, 256)
        self.tense_proj = nn.Linear(512, 256)

        self.event_head = nn.Linear(256, num_event_types)
        self.emotion_head = nn.Linear(256, num_emotions)
        self.sarcasm_head = nn.Linear(256, 1)
        self.tense_head = nn.Linear(256, num_tenses)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = self.dropout(output.pooler_output)
        x = self.shared(x)

        return {
            'event_type': self.event_head(self.event_proj(x)),
            'emotion': self.emotion_head(self.emotion_proj(x)),
            'sarcasm': torch.sigmoid(self.sarcasm_head(self.sarcasm_proj(x))),
            'tense': self.tense_head(self.tense_proj(x))
        }

# Tokenizer and DataLoader
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_data = EventDataset(train_df, tokenizer)
val_data = EventDataset(val_df, tokenizer)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)

# Emotion class weights
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight('balanced', classes=np.unique(df['EMOTION_ID']), y=df['EMOTION_ID'])
emotion_weights = torch.tensor(weights, dtype=torch.float)

# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EventClassifier(len(le_event.classes_), len(le_emotion.classes_), len(le_tense.classes_)).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn_ce = nn.CrossEntropyLoss()
loss_fn_emo = nn.CrossEntropyLoss(weight=emotion_weights.to(device))
loss_fn_bce = nn.BCELoss()

# Training Loop
for epoch in range(4):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask)

        loss_event = loss_fn_ce(outputs['event_type'], batch['event_type'].to(device))
        loss_emo = loss_fn_emo(outputs['emotion'], batch['emotion'].to(device))
        loss_sar = loss_fn_bce(outputs['sarcasm'].squeeze(), batch['sarcasm'].float().to(device))
        loss_tense = loss_fn_ce(outputs['tense'], batch['tense'].to(device))

        loss = loss_event + loss_emo + loss_sar + loss_tense

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluation (Event Type only)
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids, attention_mask)

        all_preds += outputs['event_type'].argmax(dim=1).cpu().tolist()
        all_labels += batch['event_type'].tolist()

print("\nEvent Type Classification Report:")
print(classification_report(all_labels, all_preds, target_names=le_event.classes_))

accuracy = accuracy_score(all_labels, all_preds)
print(f"\nEvent Type Accuracy: {accuracy * 100:.2f}% out of 100\n")

# Utility Functions
sid = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    score = sid.polarity_scores(text)['compound']
    normalized = round((score + 1) / 2, 1)
    return max(min(normalized, 1.0), 0.0)

def analyze_certainty(text):
    certainty_keywords = ["sure", "definitely", "certain", "guarantee", "confident", "no doubt"]
    fuzzy_keywords = ["maybe", "possibly", "might", "not sure", "doubt"]
    text = text.lower()
    if any(w in text for w in certainty_keywords):
        return 1.0
    elif any(w in text for w in fuzzy_keywords):
        return 0.3
    return 0.6

# Inference
def predict_event(text):
    model.eval()
    encoded = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask)

    pred_event = le_event.inverse_transform([torch.argmax(outputs['event_type'], dim=1).item()])[0]
    pred_emotion = le_emotion.inverse_transform([torch.argmax(outputs['emotion'], dim=1).item()])[0]
    pred_sarcasm = outputs['sarcasm'].item() > 0.5
    pred_tense = le_tense.inverse_transform([torch.argmax(outputs['tense'], dim=1).item()])[0]

    return {
        "TEXT": text,
        "EVENT_TYPE": pred_event,
        "EVENT_GROUP": "employment",
        "SENTIMENT_VALENCE": analyze_sentiment(text),
        "EMOTION": pred_emotion,
        "SARCASM": pred_sarcasm,
        "TENSE": pred_tense,
        "CERTAINTY": analyze_certainty(text)
    }

# Example Usage
print(predict_event("They finally laid me off after months of warnings."))
print(predict_event("Just got hired at a startup in Berlin. I'm thrilled!"))