In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch import nn
from torch.optim import AdamW
from tqdm import tqdm
from textblob import TextBlob
import nltk
nltk.download('punkt')
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load Dataset
df = pd.read_csv('/content/drive/MyDrive/ATCTM/EVENT_CLASSIFICATION/EC-demo.csv')
df.dropna(subset=['TEXT'], inplace=True)

# Encode categorical columns
le_event = LabelEncoder()
le_emotion = LabelEncoder()
le_tense = LabelEncoder()

# Encoding target labels
df['EVENT_TYPE_ID'] = le_event.fit_transform(df['EVENT_TYPE'])
df['EMOTION_ID'] = le_emotion.fit_transform(df['EMOTION'])
df['TENSE_ID'] = le_tense.fit_transform(df['TENSE'])
df['SARCASM_ID'] = df['SARCASM'].astype(int)

# Dataset Class
class EventDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        encoded = tokenizer(row['TEXT'], padding='max_length', truncation=True, max_length=self.max_len, return_tensors='pt')
        return {
            'input_ids': encoded['input_ids'].squeeze(0),
            'attention_mask': encoded['attention_mask'].squeeze(0),
            'event_type': torch.tensor(row['EVENT_TYPE_ID']),
            'emotion': torch.tensor(row['EMOTION_ID']),
            'sarcasm': torch.tensor(row['SARCASM_ID']),
            'tense': torch.tensor(row['TENSE_ID']),
        }

# Model Definition
class EventClassifier(nn.Module):
    def __init__(self, num_event_types, num_emotions, num_tenses):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.shared = nn.Linear(768, 512)

        self.event_head = nn.Linear(512, num_event_types)
        self.emotion_head = nn.Linear(512, num_emotions)
        self.sarcasm_head = nn.Linear(512, 1)
        self.tense_head = nn.Linear(512, num_tenses)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = self.dropout(output.pooler_output)
        x = self.shared(x)

        return {
            'event_type': self.event_head(x),
            'emotion': self.emotion_head(x),
            'sarcasm': torch.sigmoid(self.sarcasm_head(x)),
            'tense': self.tense_head(x)
        }

# Tokenizer and Dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)
train_data = EventDataset(train_df, tokenizer)
val_data = EventDataset(val_df, tokenizer)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)

# Training Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EventClassifier(len(le_event.classes_), len(le_emotion.classes_), len(le_tense.classes_)).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn_ce = nn.CrossEntropyLoss()
loss_fn_bce = nn.BCELoss()

# Training Loop
for epoch in range(4):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask)

        loss_event = loss_fn_ce(outputs['event_type'], batch['event_type'].to(device))
        loss_emo = loss_fn_ce(outputs['emotion'], batch['emotion'].to(device))
        loss_sar = loss_fn_bce(outputs['sarcasm'].squeeze(), batch['sarcasm'].float().to(device))
        loss_tense = loss_fn_ce(outputs['tense'], batch['tense'].to(device))

        loss = loss_event + loss_emo + loss_sar + loss_tense

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")

# Utility Functions

def analyze_sentiment(text):
    blob = TextBlob(text)
    return round((blob.sentiment.polarity + 1) / 2, 3)

def analyze_certainty(text):
    certainty_words = ["sure", "definitely", "certain", "guarantee"]
    return 1.0 if any(word in text.lower() for word in certainty_words) else 0.6

# Inference
model.eval()
def predict_event(text):
    encoded = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask)

    pred_event = le_event.inverse_transform([torch.argmax(outputs['event_type'], dim=1).item()])[0]
    pred_emotion = le_emotion.inverse_transform([torch.argmax(outputs['emotion'], dim=1).item()])[0]
    pred_sarcasm = outputs['sarcasm'].item() > 0.5
    pred_tense = le_tense.inverse_transform([torch.argmax(outputs['tense'], dim=1).item()])[0]

    return {
        "TEXT": text,
        "EVENT_TYPE": pred_event,
        "EVENT_GROUP": "employment",
        "SENTIMENT_VALENCE": analyze_sentiment(text),
        "EMOTION": pred_emotion,
        "SARCASM": pred_sarcasm,
        "TENSE": pred_tense,
        "CERTAINTY": analyze_certainty(text)
    }

# Example Usage
print(predict_event("They finally laid me off after months of warnings."))
print(predict_event("Just got hired at a startup in Berlin. I'm thrilled!"))

In [None]:
sample_texts = [
    "I just lost my job today and I feel completely defeated.",
    "They hired me for the dream role I always wanted!",
    "Laid off again, and it stings just like the first time.",
    "Finally got the offer letter! Starting next Monday!",
    "They fired me over a small mistake, feels unfair.",
    "Can't believe I actually made it through the interview process!",
    "Got laid off along with half the team, it's chaos.",
    "I'm joining a new firm next week, pumped for the switch.",
    "My boss just fired me over a petty argument.",
    "Excited to start fresh with this new opportunity.",
    "They gave me a pink slip without warning.",
    "So thrilled to be hired by a company I admire!",
    "I was let go due to cost-cutting, nothing personal they said.",
    "I nailed the interview and just received the job confirmation.",
    "After years of service, they just fired me like that.",
    "Job hunt finally ends — I’m hired!",
    "Fired again. I think I’m the problem.",
    "Secured a position at a firm I’ve always respected.",
    "They dismissed me right before the holidays. Brutal.",
    "Starting my new role next Monday — excited and nervous!"
]

for txt in sample_texts:
    print(predict_event(txt))