In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch import nn
from torch.optim import AdamW
from tqdm import tqdm
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

In [None]:
df = pd.read_csv('/content/drive/MyDrive/ATCTM/EVENT_CLASSIFICATION/EC-demo.csv')
df.dropna(subset=['TEXT'], inplace=True)

# Generalize Event Types
def generalize_event_type(label):
    if label in ['got_hired', 'started_new_career']:
        return 'job_entry'
    elif label in ['got_fired', 'got_laid_off', 'retired', 'failed_interview']:
        return 'job_exit'
    elif label in ['changed_jobs', 'became_freelancer', 'started_business']:
        return 'job_transition'
    elif label in ['got_demoted', 'got_pay_cut', 'business_failed']:
        return 'job_loss'
    elif label in ['got_promoted', 'got_raise', 'got_tenure', 'got_work_award']:
        return 'job_gain'
    elif label in ['got_sabbatical', 'got_late_to_work', 'missed_deadline']:
        return 'job_break'
    elif label in ['workplace_harassment']:
        return 'workplace_issue'
    return 'other'

df['EVENT_GENERAL'] = df['EVENT_TYPE'].apply(generalize_event_type)

# Encode categorical columns
le_event = LabelEncoder()
le_general = LabelEncoder()
df['EVENT_TYPE_ID'] = le_event.fit_transform(df['EVENT_TYPE'])
df['EVENT_GENERAL_ID'] = le_general.fit_transform(df['EVENT_GENERAL'])

# Stratified Split
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, val_idx in splitter.split(df, df['EVENT_TYPE_ID']):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

# Dataset Class
class EventDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        encoded = self.tokenizer(row['TEXT'], padding='max_length', truncation=True, max_length=self.max_len, return_tensors='pt')
        return {
            'input_ids': encoded['input_ids'].squeeze(0),
            'attention_mask': encoded['attention_mask'].squeeze(0),
            'event_type': torch.tensor(row['EVENT_GENERAL_ID']),
        }

# Model Definition
class EventClassifier(nn.Module):
    def __init__(self, num_event_types):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.shared = nn.Linear(768, 512)
        self.event_proj = nn.Linear(512, 256)
        self.event_head = nn.Linear(256, num_event_types)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = self.dropout(output.pooler_output)
        x = self.shared(x)
        return self.event_head(self.event_proj(x))

# Tokenizer and DataLoader
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_data = EventDataset(train_df, tokenizer)
val_data = EventDataset(val_df, tokenizer)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)

# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EventClassifier(len(le_general.classes_)).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn_ce = nn.CrossEntropyLoss()

# Training Loop
for epoch in range(4):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['event_type'].to(device)

        outputs = model(input_ids, attention_mask)
        loss = loss_fn_ce(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")

In [None]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['event_type'].to(device)

        outputs = model(input_ids, attention_mask)
        all_preds += outputs.argmax(dim=1).cpu().tolist()
        all_labels += labels.cpu().tolist()

print("\nGeneralized Event Type Classification Report:")
print(classification_report(all_labels, all_preds, target_names=le_general.classes_))

accuracy = accuracy_score(all_labels, all_preds)
print(f"\nGeneralized Event Type Accuracy: {accuracy * 100:.2f}% out of 100\n")

# Utility Functions
sid = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    score = sid.polarity_scores(text)['compound']
    normalized = round((score + 1) / 2, 1)
    return max(min(normalized, 1.0), 0.0)

def analyze_certainty(text):
    certainty_keywords = ["sure", "definitely", "certain", "guarantee", "confident", "no doubt"]
    fuzzy_keywords = ["maybe", "possibly", "might", "not sure", "doubt"]
    text = text.lower()
    if any(w in text for w in certainty_keywords):
        return 1.0
    elif any(w in text for w in fuzzy_keywords):
        return 0.3
    return 0.6

# Inference
def predict_event(text):
    model.eval()
    encoded = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask)

    pred_general = le_general.inverse_transform([torch.argmax(outputs, dim=1).item()])[0]

    return {
        "TEXT": text,
        "EVENT_GENERAL": pred_general,
        "EVENT_GROUP": "employment",
        "SENTIMENT_VALENCE": analyze_sentiment(text),
        "CERTAINTY": analyze_certainty(text)
    }

# Example Usage
print(predict_event("They finally laid me off after months of warnings."))
print(predict_event("Just got hired at a startup in Berlin. I'm thrilled!"))


In [None]:
sample_texts = [
    # got_hired
    "Landed a job at Amazon! Starting next month — dream come true.",
    "Finally got hired after 6 months of applying everywhere!",

    # got_fired
    "I got fired for missing a single deadline. Brutal.",
    "Fired today. No severance, no explanation. Just like that.",

    # got_laid_off
    "Mass layoffs hit us today. I'm officially unemployed.",
    "Got laid off from my role — feels surreal and scary.",

    # got_promoted
    "Promotion confirmed! I'm the new marketing lead!",
    "Manager just announced my promotion during the meeting. Still shocked!",

    # got_demoted
    "Was demoted today. They said it's 'temporary'. Yeah, right.",
    "Got moved back to associate level. Basically a demotion.",

    # changed_jobs
    "Switched from banking to edtech — already loving the new culture.",
    "Changed jobs last week. It’s a bit chaotic but exciting!",

    # started_new_career
    "Started a new chapter as a freelance writer!",
    "Quit my job to pursue graphic design — feels right finally.",

    # retired
    "Clocked out for the last time — officially retired!",
    "I’m retired now. Looking forward to gardening and naps.",

    # got_raise
    "My boss surprised me with a raise today — totally unexpected!",
    "Annual review went great — got a decent raise!",

    # got_pay_cut
    "Pay cut hits again. Not sure how I’ll manage rent this month.",
    "They reduced my salary by 15% due to performance. Pretty demotivating.",

    # started_business
    "Launched my online store today — finally a business owner!",
    "Started my own consulting firm — day one is here!",

    # business_failed
    "My business couldn’t survive the recession. Shutting down now.",
    "We had to close shop. Startup failed after two years of grind.",

    # got_late_to_work
    "Was late to work again. Boss wasn’t thrilled.",
    "Woke up late and missed the morning meeting. Oops!",

    # missed_deadline
    "Missed the project deadline. Client isn’t happy.",
    "Another deadline missed... really need to manage my time better.",

    # got_work_award
    "Won best performer of the year! Feeling proud!",
    "Received the excellence award at work — huge moment for me!",

    # workplace_harassment
    "Facing harassment at work but HR isn’t taking action.",
    "My coworker keeps making inappropriate comments. I’ve reported it.",

    # got_sabbatical
    "Approved for a 3-month sabbatical — can’t wait to travel.",
    "Finally taking a sabbatical after 8 years of non-stop work.",

    # became_freelancer
    "Left my 9-to-5 to become a full-time freelancer!",
    "Now freelancing for startups worldwide — loving the freedom!",

    # got_tenure
    "Received tenure today! Officially permanent faculty.",
    "Tenure granted. All the late nights finally paid off.",

    # failed_interview
    "Didn't make it past the second round. Another failed interview.",
    "Failed yet another interview. Starting to lose hope."
]

for txt in sample_texts:
    print(predict_event(txt))