In [18]:
!pip install transformers datasets torch sklearn tqdm indic-transliteration
!pip install indic-transliteration
!pip install --upgrade indic-transliteration

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


In [19]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from collections import defaultdict
from tqdm.auto import tqdm
import os
from typing import Dict, Tuple, List
from sklearn.model_selection import train_test_split

# Disable wandb logging
os.environ['WANDB_MODE'] = 'disabled'


def convert_hinglish_to_hindi(text: str) -> str:
    
    def is_mostly_roman(text: str) -> bool:
        
        if not text:
            return False
        devanagari_chars = sum(1 for ch in text if '\u0900' <= ch <= '\u097F')
        return (devanagari_chars / len(text)) < 0.3
        
    def transliterate_to_hindi(text: str) -> str:
        """Convert Roman text to Devanagari."""
        from indic_transliteration import sanscript
        return sanscript.transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)

    if not text:
        return text
    return transliterate_to_hindi(text) if is_mostly_roman(text) else text

def aggregate_labels(df: pd.DataFrame, lang: str) -> pd.DataFrame:
    
    def get_annotator_cols(lang: str) -> List[str]:
        
        return {
            'en': [f"en_a{i}" for i in range(1, 7)],
            'hi': [f"hi_a{i}" for i in range(1, 6)], 
            'ta': [f"ta_a{i}" for i in range(1, 7)],
        }[lang]
    
    def calculate_majority_label(row: pd.Series) -> int:
        
        return int(row.mean() >= 0.5)

    annotator_cols = get_annotator_cols(lang)
    df[annotator_cols] = df[annotator_cols].apply(pd.to_numeric, errors='coerce')
    df['label'] = df[annotator_cols].apply(calculate_majority_label, axis=1)
    return df


class Vocabulary:
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0
        
        # Add special tokens
        self.add_word('<pad>')  # Padding token
        self.add_word('<unk>')  # Unknown word token
        
    def add_word(self, word):
        if word not in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1
            
    def __len__(self):
        return len(self.word2idx)

def build_vocab(texts, min_freq=2):
    def count_words(texts):
        
        word_counts = defaultdict(int)
        for text in texts:
            for word in text.split():
                word_counts[word.lower()] += 1
        return word_counts
    
    def add_frequent_words(vocab, word_counts, min_freq):
        
        for word, count in word_counts.items():
            if count >= min_freq:
                vocab.add_word(word)
        return vocab
    
    vocab = Vocabulary()
    word_counts = count_words(texts)
    vocab = add_frequent_words(vocab, word_counts, min_freq)
    return vocab


In [20]:
class TextDataset(Dataset):
    def __init__(self, texts, vocab, labels=None, label2=None):
        self.texts = []
        self.vocab = vocab
        self.labels = []
        self.label2 = []
        
        for i, text in enumerate(texts):
            if isinstance(text, str) and text.strip():
                self.texts.append(text)
                if labels is not None and i < len(labels):
                    self.labels.append(labels[i])
                if label2 is not None and i < len(label2):
                    self.label2.append(label2[i])
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        def get_text_indices(text):
            
            return [self.vocab.word2idx.get(word.lower(), self.vocab.word2idx['<unk>']) 
                    for word in text.split()]
        
        def build_item_dict(indices):
             
            item = {
                'text': torch.tensor(indices, dtype=torch.long),
            }
            if len(self.labels) > 0:
                item['label'] = torch.tensor(self.labels[idx], dtype=torch.long)
            if len(self.label2) > 0:
                item['label2'] = torch.tensor(self.label2[idx], dtype=torch.long)
            return item
        
        text = self.texts[idx]
        indices = get_text_indices(text)
        return build_item_dict(indices)

def collate_fn(batch):
    
    def build_base_dict(texts):
        
        texts_padded = pad_sequence(texts, batch_first=True, padding_value=0)
        return {
            'text': texts_padded,
            'lengths': torch.tensor([len(text) for text in texts])
        }
        
    def add_labels(batch_dict, batch): 
        """Add label tensors if present in batch"""
        if 'label' in batch[0]:
            batch_dict['label'] = torch.tensor([item['label'] for item in batch])
            
        if 'label2' in batch[0]:
            batch_dict['label2'] = torch.tensor([item['label2'] for item in batch])
        return batch_dict
        
    texts = [item['text'] for item in batch]
    batch_dict = build_base_dict(texts)
    return add_labels(batch_dict, batch)

def load_data(lang: str, label_type: str, is_train: bool = True) -> pd.DataFrame:
    

    def read_csv_with_fallback(filename: str) -> pd.DataFrame:
        
        try:
            # First try standard read with error_bad_lines=False
            df = pd.read_csv(filename, on_bad_lines='warn', engine='python')
        except Exception as e:
            print(f"Standard read failed for {filename}: {str(e)}")
            try:
                # Try with Python engine
                df = pd.read_csv(filename, engine='python', on_bad_lines='skip')
            except Exception as e:
                print(f"Python engine read failed for {filename}: {str(e)}")
                try:
                    # Try reading raw file with error handling
                    with open(filename, 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                    # Simple CSV parsing if standard methods fail
                    data = [line.strip().split(',') for line in lines]
                    df = pd.DataFrame(data[1:], columns=data[0])
                except Exception as e:
                    print(f"All read methods failed for {filename}: {str(e)}")
                    return pd.DataFrame()
        return df

    def process_dataframe(df: pd.DataFrame, lang: str) -> pd.DataFrame:
        
        if len(df) == 0:
            return df
        
        if lang == 'hi':
            df['text'] = df['text'].apply(convert_hinglish_to_hindi)
        return df

    def ensure_required_columns(df: pd.DataFrame, lang: str) -> pd.DataFrame:
        
        required_cols = ['text'] + [f"{lang}_a{i}" for i in range(1, 7 if lang in ['en', 'ta'] else 6)]
        for col in required_cols:
            if col not in df.columns:
                df[col] = 0  # Default value if column is missing
        return df

    # Main execution flow
    filename = f"/kaggle/input/gender-abuse-{'train' if is_train else 'test'}/{'train' if is_train else 'test'}_{lang}_{label_type}.csv"
    df = read_csv_with_fallback(filename)
    df = process_dataframe(df, lang)
    df = ensure_required_columns(df, lang)
    return aggregate_labels(df, lang)


In [21]:
class BiLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, 
                 n_layers, dropout, pretrained_embeddings=None):
        super().__init__()
        
        def init_embedding():
            
            if pretrained_embeddings is not None:
                return nn.Embedding.from_pretrained(pretrained_embeddings, freeze=False)
            return nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        def init_layers():
            
            lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers,
                          bidirectional=True, dropout=dropout, batch_first=True)
            fc = nn.Linear(hidden_dim * 2, output_dim)  
            dropout_layer = nn.Dropout(dropout)
            return lstm, fc, dropout_layer
        
        
        self.embedding = init_embedding()
        self.lstm, self.fc, self.dropout = init_layers()
        
    def forward(self, text, text_lengths):
        def get_lstm_output(embedded):
            
            packed_embedded = nn.utils.rnn.pack_padded_sequence(
                embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
            return self.lstm(packed_embedded)
        
        def process_hidden_state(hidden):
            
            combined = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
            return self.dropout(combined)
        
        # Main forward pass
        embedded = self.dropout(self.embedding(text))
        packed_output, (hidden, cell) = get_lstm_output(embedded)
        hidden_processed = process_hidden_state(hidden)
        
        return self.fc(hidden_processed)


In [22]:
class MultiTaskBiLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, 
                     n_layers, dropout, pretrained_embeddings=None):
            super().__init__()
            
            def init_embedding():
                
                if pretrained_embeddings is not None:
                    return nn.Embedding.from_pretrained(pretrained_embeddings, freeze=False)
                return nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
            
            def init_layers():
                
                lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers,
                              bidirectional=True, dropout=dropout, batch_first=True)
                fc1 = nn.Linear(hidden_dim * 2, output_dim)  # For task 1
                fc2 = nn.Linear(hidden_dim * 2, output_dim)  # For task 2
                dropout_layer = nn.Dropout(dropout)
                return lstm, fc1, fc2, dropout_layer
            
            # Initialize components using nested functions
            self.embedding = init_embedding()
            self.lstm, self.fc1, self.fc2, self.dropout = init_layers()
        
    def forward(self, text, text_lengths):
        def process_shared_layers():
            
            embedded = self.dropout(self.embedding(text))
            packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
            packed_output, (hidden, cell) = self.lstm(packed_embedded)
            return self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
            
        def get_task_outputs(hidden):
            
            output1 = self.fc1(hidden)
            output2 = self.fc2(hidden)
            return output1, output2

        # Main forward pass        
        hidden = process_shared_layers()
        return get_task_outputs(hidden)

# ======== 3. Training Functions ============
def train_single_task(model, train_loader, val_loader, epochs=5, learning_rate=1e-3):
    def setup_training():
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.CrossEntropyLoss()
        best_f1 = 0
        history = {'train_loss': [], 'val_loss': [], 'val_f1': []}
        return device, optimizer, criterion, best_f1, history

    def train_epoch(optimizer, criterion, device):
        
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            optimizer.zero_grad()
            texts = batch['text'].to(device)
            lengths = batch['lengths'].to(device)
            labels = batch['label'].to(device)
            predictions = model(texts, lengths)
            loss = criterion(predictions, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(train_loader)

    def validate_model(device):
        
        val_f1, val_loss = evaluate_single_task(model, val_loader, device)
        return val_f1, val_loss

    def update_history(train_loss, val_loss, val_f1, history, best_f1):
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['val_f1'].append(val_f1)
        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}")
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), 'best_model.pt')
        return best_f1

    # Main training loop
    device, optimizer, criterion, best_f1, history = setup_training()
    for epoch in range(epochs):
        train_loss = train_epoch(optimizer, criterion, device)
        val_f1, val_loss = validate_model(device)
        best_f1 = update_history(train_loss, val_loss, val_f1, history, best_f1)
    
    model.load_state_dict(torch.load('best_model.pt'))
    return model, history

In [23]:
def evaluate_single_task(model, data_loader, device):
    def setup_evaluation():
        
        model.eval()
        return [], [], 0, nn.CrossEntropyLoss()

    def process_batch(batch, preds, true_labels, total_loss):
        
        texts = batch['text'].to(device)
        lengths = batch['lengths'].to(device)
        labels = batch['label'].to(device)
        
        predictions = model(texts, lengths)
        loss = criterion(predictions, labels)
        batch_preds = torch.argmax(predictions, dim=-1)
        
        preds.extend(batch_preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
        return loss.item()
        
    def calculate_metrics(total_loss, preds, true_labels):
        """Calculate and return final metrics"""
        avg_loss = total_loss / len(data_loader)
        f1 = f1_score(true_labels, preds, average='macro')
        return f1, avg_loss

    # Main evaluation flow
    preds, true_labels, total_loss, criterion = setup_evaluation()
    with torch.no_grad():
        for batch in data_loader:
            total_loss += process_batch(batch, preds, true_labels, total_loss)
    
    return calculate_metrics(total_loss, preds, true_labels)

def train_multi_task(model, train_loader, val_loader, epochs=5, learning_rate=1e-3):
    def setup_training():
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.CrossEntropyLoss()
        best_avg_f1 = 0
        history = {'train_loss': [], 'val_loss': [], 'val_f1_l1': [], 'val_f1_l3': []}
        return device, optimizer, criterion, best_avg_f1, history

    def train_epoch(optimizer, criterion, device):
        
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            optimizer.zero_grad()
            texts, lengths = batch['text'].to(device), batch['lengths'].to(device)
            labels1, labels2 = batch['label'].to(device), batch['label2'].to(device)
            
            predictions1, predictions2 = model(texts, lengths)
            loss = criterion(predictions1, labels1) + criterion(predictions2, labels2)
            
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(train_loader)

    def validate_model(device):
        
        (f1_l1, val_loss1), (f1_l3, val_loss2) = evaluate_multi_task(model, val_loader, device)
        avg_val_loss = (val_loss1 + val_loss2) / 2
        avg_f1 = (f1_l1 + f1_l3) / 2
        return f1_l1, f1_l3, avg_val_loss, avg_f1

    def update_history(train_loss, val_metrics, history, best_avg_f1):
        
        f1_l1, f1_l3, avg_val_loss, avg_f1 = val_metrics
        history['train_loss'].append(train_loss)
        history['val_loss'].append(avg_val_loss)
        history['val_f1_l1'].append(f1_l1)
        history['val_f1_l3'].append(f1_l3)
        
        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
        print(f"L1: F1={f1_l1:.4f} | L3: F1={f1_l3:.4f}")
        
        if avg_f1 > best_avg_f1:
            best_avg_f1 = avg_f1
            torch.save(model.state_dict(), 'best_multi_model.pt')
        return best_avg_f1

    # Main training loop
    device, optimizer, criterion, best_avg_f1, history = setup_training()
    for epoch in range(epochs):
        train_loss = train_epoch(optimizer, criterion, device)
        val_metrics = validate_model(device)
        best_avg_f1 = update_history(train_loss, val_metrics, history, best_avg_f1)
    
    model.load_state_dict(torch.load('best_multi_model.pt'))
    return model, history


In [24]:
def evaluate_multi_task(model, data_loader, device):
    def process_batch(batch, criterion):
        
        texts = batch['text'].to(device)
        lengths = batch['lengths'].to(device)
        labels1 = batch['label'].to(device)
        labels2 = batch['label2'].to(device)
        
        predictions1, predictions2 = model(texts, lengths)
        loss1 = criterion(predictions1, labels1)
        loss2 = criterion(predictions2, labels2)
        
        batch_preds1 = torch.argmax(predictions1, dim=-1)
        batch_preds2 = torch.argmax(predictions2, dim=-1)
        
        return (
            batch_preds1, batch_preds2, labels1, labels2,
            loss1.item(), loss2.item()
        )
    
    def calculate_metrics(preds_l1, preds_l3, true_l1, true_l3, total_loss1, total_loss2):
        
        avg_loss1 = total_loss1 / len(data_loader)
        avg_loss2 = total_loss2 / len(data_loader)
        f1_l1 = f1_score(true_l1, preds_l3, average='macro')
        f1_l3 = f1_score(true_l3, preds_l3, average='macro')
        return (f1_l1, avg_loss1), (f1_l3, avg_loss2)
    
    model.eval()
    preds_l1, preds_l3, true_l1, true_l3 = [], [], [], []
    total_loss1, total_loss2 = 0, 0
    criterion = nn.CrossEntropyLoss()
    
    with torch.no_grad():
        for batch in data_loader:
            batch_preds1, batch_preds2, labels1, labels2, loss1, loss2 = process_batch(batch, criterion)
            preds_l1.extend(batch_preds1.cpu().numpy())
            preds_l3.extend(batch_preds2.cpu().numpy())
            true_l1.extend(labels1.cpu().numpy())
            true_l3.extend(labels2.cpu().numpy())
            total_loss1 += loss1
            total_loss2 += loss2
    
    return calculate_metrics(preds_l1, preds_l3, true_l1, true_l3, total_loss1, total_loss2)

In [25]:
def run_task1():
    
    print("=== Running Task 1 (BiLSTM) ===")
    
    def load_and_combine_data():
        
        train_dfs = []
        for lang in ['en', 'hi', 'ta']:
            df = load_data(lang, "l1", is_train=True)
            train_dfs.append(df[['text', 'label']])
        train_df = pd.concat(train_dfs, ignore_index=True)
        return train_df['text'].tolist(), train_df['label'].tolist()
    
    def create_dataloaders(texts, labels):
        
        vocab = build_vocab(texts)
        train_texts, val_texts, train_labels, val_labels = train_test_split(
            texts, labels, test_size=0.2, random_state=42
        )
        
        train_dataset = TextDataset(train_texts, vocab, train_labels)
        val_dataset = TextDataset(val_texts, vocab, val_labels)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
        return train_loader, val_loader, vocab
    
    def train_model(train_loader, val_loader, vocab):
        
        model = BiLSTMModel(
            vocab_size=len(vocab),
            embedding_dim=300,
            hidden_dim=256,
            output_dim=2,
            n_layers=2,
            dropout=0.5
        )
        return train_single_task(model, train_loader, val_loader, epochs=5)

    # Main execution flow
    texts, labels = load_and_combine_data()
    train_loader, val_loader, vocab = create_dataloaders(texts, labels)
    model, history = train_model(train_loader, val_loader, vocab)
    return model, vocab


In [26]:
def run_task2():
    
    print("\n=== Running Task 2 (BiLSTM Transfer Learning) ===")
    
    def load_hate_speech_data():
        
        def load_twitter_data():
            
            try:
                from datasets import load_dataset
                twitter_hate = load_dataset("tweets_hate_speech_detection")
                df = pd.DataFrame({
                    'text': twitter_hate['train']['tweet'],
                    'label': twitter_hate['train']['label']
                })
                print(f"Loaded {len(df)} English hate speech samples from Twitter")
                return df
            except Exception as e:
                print(f"Couldn't load Twitter hate speech data: {e}")
                return pd.DataFrame({
                    'text': ["you are stupid", "women are inferior", "this is normal"],
                    'label': [1, 1, 0]
                })

        def load_indic_data():
            
            hindi_df = pd.DataFrame({'text': [], 'label': []})
            tamil_df = pd.DataFrame({'text': [], 'label': []})
            
            try:
                hindi_df = pd.read_csv("/kaggle/input/macd-data/hindi_train.csv")[['text', 'label']]
                hindi_df['text'] = hindi_df['text'].apply(convert_hinglish_to_hindi)
                print(f"Loaded {len(hindi_df)} Hindi hate speech samples")
            except Exception as e:
                print(f"Couldn't load Hindi data: {e}")
                hindi_df = pd.DataFrame({
                    'text': ["तुम मूर्ख हो", "स्त्रियाँ अयोग्य हैं", "यह सामान्य है"],
                    'label': [1, 1, 0]
                })

            try:
                tamil_df = pd.read_csv("/kaggle/input/macd-data/tamil_train.csv")[['text', 'label']]
                print(f"Loaded {len(tamil_df)} Tamil hate speech samples")
            except Exception as e:
                print(f"Couldn't load Tamil data: {e}")
                tamil_df = pd.DataFrame({
                    'text': ["நீ முட்டாள்", "பெண்கள் தகுதியற்றவர்கள்", "இது சாதாரணமானது"],
                    'label': [1, 1, 0]
                })
            
            return pd.concat([hindi_df, tamil_df], ignore_index=True)

        english_hate = load_twitter_data()
        indic_hate = load_indic_data()
        return pd.concat([english_hate, indic_hate], ignore_index=True)

    def pretrain_model(external_data):
        
        vocab_ext = build_vocab(external_data['text'])
        train_dataset = TextDataset(external_data['text'].tolist(), vocab_ext, external_data['label'].tolist())
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
        
        pretrain_model = BiLSTMModel(
            vocab_size=len(vocab_ext),
            embedding_dim=300,
            hidden_dim=256,
            output_dim=2,
            n_layers=2,
            dropout=0.5
        )
        
        print("\nPretraining on hate speech data...")
        return train_single_task(pretrain_model, train_loader, train_loader, epochs=3)

    def prepare_task1_data():
        
        task1_texts, task1_labels = [], []
        for lang in ['en', 'hi', 'ta']:
            df = load_data(lang, "l1", is_train=True)
            task1_texts.extend(df['text'].tolist())
            task1_labels.extend(df['label'].tolist())
        
        vocab_task1 = build_vocab(task1_texts)
        train_texts, val_texts, train_labels, val_labels = train_test_split(
            task1_texts, task1_labels, test_size=0.2, random_state=42
        )
        
        return vocab_task1, train_texts, val_texts, train_labels, val_labels

    def create_fine_tune_model(pretrained_model, vocab_task1, task1_data):
        
        train_texts, val_texts, train_labels, val_labels = task1_data
        
        train_dataset = TextDataset(train_texts, vocab_task1, train_labels)
        val_dataset = TextDataset(val_texts, vocab_task1, val_labels)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
        
        task2_model = BiLSTMModel(
            vocab_size=len(vocab_task1),
            embedding_dim=300,
            hidden_dim=256,
            output_dim=2,
            n_layers=2,
            dropout=0.5
        )
        
        # Copy weights from pretrained model
        task2_model.lstm.load_state_dict(pretrained_model.lstm.state_dict())
        task2_model.fc.load_state_dict(pretrained_model.fc.state_dict())
        
        return task2_model, train_loader, val_loader

    # Main execution flow
    external_data = load_hate_speech_data()
    pretrained_model, _ = pretrain_model(external_data)
    
    vocab_task1, *task1_data = prepare_task1_data()
    task2_model, train_loader, val_loader = create_fine_tune_model(pretrained_model, vocab_task1, task1_data)
    
    print("\nFine-tuning on Task 1 data...")
    final_model, history = train_single_task(task2_model, train_loader, val_loader, epochs=3)
    
    return final_model, vocab_task1

In [27]:
def run_task3():
    
    print("\n=== Running Task 3 (BiLSTM) ===")
    
    def load_and_merge_data():
        
        train_dfs = []
        for lang in ['en', 'hi', 'ta']:
            df_l1 = load_data(lang, "l1", is_train=True)
            df_l3 = load_data(lang, "l3", is_train=True)
            merged = pd.merge(
                df_l1[['text', 'label']].rename(columns={'label': 'label1'}),
                df_l3[['text', 'label']].rename(columns={'label': 'label3'}),
                on='text',
                how='inner'
            )
            train_dfs.append(merged)
        return pd.concat(train_dfs, ignore_index=True)

    def prepare_data(train_df):
        
        texts = train_df['text'].tolist()
        labels1 = train_df['label1'].tolist()
        labels3 = train_df['label3'].tolist()
        vocab = build_vocab(texts)
        return texts, labels1, labels3, vocab

    def split_data(texts, labels1, labels3):
        
        return train_test_split(
            texts, labels1, labels3,
            test_size=0.2,
            random_state=42
        )

    def create_dataloaders(splits, vocab):
        
        train_texts, val_texts, train_labels1, val_labels1, train_labels3, val_labels3 = splits
        train_dataset = TextDataset(train_texts, vocab, train_labels1, train_labels3)
        val_dataset = TextDataset(val_texts, vocab, val_labels1, val_labels3)
        
        return (
            DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn),
            DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
        )

    def train_model(train_loader, val_loader, vocab):
        
        model = MultiTaskBiLSTM(
            vocab_size=len(vocab),
            embedding_dim=300,
            hidden_dim=256,
            output_dim=2,
            n_layers=2,
            dropout=0.5
        )
        return train_multi_task(model, train_loader, val_loader, epochs=5)

    # Main execution flow
    train_df = load_and_merge_data()
    texts, labels1, labels3, vocab = prepare_data(train_df)
    splits = split_data(texts, labels1, labels3)
    train_loader, val_loader = create_dataloaders(splits, vocab)
    model, history = train_model(train_loader, val_loader, vocab)
    
    return model, vocab

In [28]:
def evaluate(model, vocab, lang: str, label_type: str, is_multi_task: bool = False, head_idx: int = 0) -> float:
    
    
    def prepare_data():
        
        df = load_data(lang, label_type, is_train=False)
        if len(df) == 0 or 'text' not in df.columns or 'label' not in df.columns:
            print(f"No valid test data for {lang} {label_type}")
            return None
            
        df['text'] = df['text'].fillna('').astype(str)
        df = df[df['text'].str.strip() != '']
        return df if len(df) > 0 else None
    
    def create_loader(df):
        
        try:
            dataset = TextDataset(df['text'].tolist(), vocab, df['label'].tolist())
            return DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
        except Exception as e:
            print(f"Error creating dataset for {lang} {label_type}: {e}")
            return None
            
    def evaluate_multi_task(loader, device):
        
        model.eval()
        preds, true_labels = [], []
        
        with torch.no_grad():
            for batch in loader:
                texts = batch['text'].to(device)
                lengths = batch['lengths'].to(device)
                labels = batch['label'].to(device)
                
                predictions1, predictions2 = model(texts, lengths)
                predictions = predictions1 if head_idx == 0 else predictions2
                batch_preds = torch.argmax(predictions, dim=-1)
                
                preds.extend(batch_preds.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())
        
        return f1_score(true_labels, preds, average='macro') if len(true_labels) > 0 else 0.0
    
    def evaluate_single_taskkk(loader, device):
        
        try:
            f1, _ = evaluate_single_task(model, loader, device)
            return f1
        except Exception as e:
            print(f"Error during evaluation for {lang} {label_type}: {e}")
            return 0.0
    
    # Main execution flow
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    df = prepare_data()
    if df is None:
        return 0.0
        
    loader = create_loader(df)
    if loader is None:
        return 0.0
        
    return evaluate_multi_task(loader, device) if is_multi_task else evaluate_single_taskkk(loader, device)


In [29]:




def infer_task1(text: str, model, vocab) -> Dict[str, float]:
    
    def prepare_input():
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        indices = [vocab.word2idx.get(word.lower(), vocab.word2idx['<unk>']) for word in text.split()]
        tensor = torch.LongTensor(indices).unsqueeze(0).to(device)
        length = torch.LongTensor([len(indices)]).to(device)
        return tensor, length, device

    def get_prediction(tensor, length):
        
        with torch.no_grad():
            output = model(tensor, length)
            probs = torch.softmax(output, dim=-1).cpu().numpy()[0]
            pred = np.argmax(probs)
            return pred, float(probs[pred])

    # Main execution flow
    tensor, length, _ = prepare_input()
    pred, confidence = get_prediction(tensor, length)
    
    return {
        "label": "Gendered Abuse" if pred == 1 else "Not Gendered Abuse",
        "confidence": confidence,
        "task": "Task 1 (Original)"
    }

def infer_task3(text: str, model, vocab) -> Dict[str, Dict[str, float]]:
    
    def prepare_input():
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        indices = [vocab.word2idx.get(word.lower(), vocab.word2idx['<unk>']) for word in text.split()]
        tensor = torch.LongTensor(indices).unsqueeze(0).to(device) 
        length = torch.LongTensor([len(indices)]).to(device)
        return tensor, length

    def get_predictions(tensor, length):
        
        with torch.no_grad():
            output1, output2 = model(tensor, length)
            probs1 = torch.softmax(output1, dim=-1).cpu().numpy()[0]
            probs2 = torch.softmax(output2, dim=-1).cpu().numpy()[0]
            
        return {
            "Label l1 (Gendered Abuse)": {
                "prediction": "Yes" if np.argmax(probs1) == 1 else "No",
                "confidence": float(probs1[np.argmax(probs1)])
            },
            "Label l3 (Explicit)": {
                "prediction": "Yes" if np.argmax(probs2) == 1 else "No", 
                "confidence": float(probs2[np.argmax(probs2)])
            }
        }

    # Main execution flow
    tensor, length = prepare_input()
    return get_predictions(tensor, length)

# ======== Main Execution ============
if __name__ == "__main__":
    def train_all_models():
        
        print("=== Training Task 1 ===")
        model1, vocab1 = run_task1()
        
        print("\n=== Training Task 2 ===")
        model2, vocab2 = run_task2()
        
        print("\n=== Training Task 3 ===")
        model3, vocab3 = run_task3()
        
        return (model1, vocab1), (model2, vocab2), (model3, vocab3)
    
    def evaluate_and_test_models(models_and_vocabs):
        
        (model1, vocab1), (model2, vocab2), (model3, vocab3) = models_and_vocabs
        
        print("\n=== Evaluation ===")
        for lang in ['en', 'hi', 'ta']:
            # Task 1/2 (Label l1)
            task1_f1 = evaluate(model1, vocab1, lang, "l1")
            task2_f1 = evaluate(model2, vocab2, lang, "l1")
            # Task 3 (Labels l1 and l3)
            task3_l1_f1 = evaluate(model3, vocab3, lang, "l1", is_multi_task=True, head_idx=0)
            task3_l3_f1 = evaluate(model3, vocab3, lang, "l3", is_multi_task=True, head_idx=1)
            
            print(f"\n{lang.upper()} Results:")
            print(f"Task 1 (l1): {task1_f1:.4f} | Task 2 (l1): {task2_f1:.4f}")
            print(f"Task 3 (l1): {task3_l1_f1:.4f} | Task 3 (l3): {task3_l3_f1:.4f}")
        
        # Interactive inference
        test_text = "Women belong in the kitchen"
        print("\n=== Predictions ===")
        print("Task 1:", infer_task1(test_text, model1, vocab1))
        print("Task 3:", infer_task3(test_text, model3, vocab3))

    # Execute the nested functions
    models_and_vocabs = train_all_models()
    evaluate_and_test_models(models_and_vocabs)

=== Training Task 1 ===
=== Running Task 1 (BiLSTM) ===


Epoch 1/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 1 | Train Loss: 0.5912 | Val Loss: 0.5475 | Val F1: 0.6421


Epoch 2/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 2 | Train Loss: 0.5249 | Val Loss: 0.5245 | Val F1: 0.6919


Epoch 3/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 3 | Train Loss: 0.4716 | Val Loss: 0.5334 | Val F1: 0.7062


Epoch 4/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 4 | Train Loss: 0.4204 | Val Loss: 0.5604 | Val F1: 0.7119


Epoch 5/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 5 | Train Loss: 0.3686 | Val Loss: 0.6016 | Val F1: 0.7152

=== Training Task 2 ===

=== Running Task 2 (BiLSTM Transfer Learning) ===


  model.load_state_dict(torch.load('best_model.pt'))


Loaded 31962 English hate speech samples from Twitter
Loaded 20183 Hindi hate speech samples
Loaded 18000 Tamil hate speech samples

Pretraining on hate speech data...


Epoch 1/3:   0%|          | 0/2193 [00:00<?, ?it/s]

Epoch 1 | Train Loss: 0.4108 | Val Loss: 0.2976 | Val F1: 0.8525


Epoch 2/3:   0%|          | 0/2193 [00:00<?, ?it/s]

Epoch 2 | Train Loss: 0.3070 | Val Loss: 0.2124 | Val F1: 0.8936


Epoch 3/3:   0%|          | 0/2193 [00:00<?, ?it/s]

Epoch 3 | Train Loss: 0.2601 | Val Loss: 0.1707 | Val F1: 0.9179


  model.load_state_dict(torch.load('best_model.pt'))



Fine-tuning on Task 1 data...


Epoch 1/3:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 1 | Train Loss: 0.6137 | Val Loss: 0.5749 | Val F1: 0.6006


Epoch 2/3:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 2 | Train Loss: 0.5571 | Val Loss: 0.5583 | Val F1: 0.6805


Epoch 3/3:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 3 | Train Loss: 0.5051 | Val Loss: 0.5429 | Val F1: 0.6962

=== Training Task 3 ===

=== Running Task 3 (BiLSTM) ===


  model.load_state_dict(torch.load('best_model.pt'))


Epoch 1/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 1 | Train Loss: 1.2145 | Val Loss: 0.5619
L1: F1=0.6810 | L3: F1=0.6775


Epoch 2/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 2 | Train Loss: 1.0888 | Val Loss: 0.5416
L1: F1=0.6937 | L3: F1=0.7043


Epoch 3/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 3 | Train Loss: 0.9975 | Val Loss: 0.5512
L1: F1=0.6848 | L3: F1=0.7111


Epoch 4/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 4 | Train Loss: 0.9136 | Val Loss: 0.5648
L1: F1=0.6941 | L3: F1=0.7245


Epoch 5/5:   0%|          | 0/488 [00:00<?, ?it/s]

Epoch 5 | Train Loss: 0.8357 | Val Loss: 0.5703
L1: F1=0.6844 | L3: F1=0.7268

=== Evaluation ===


  model.load_state_dict(torch.load('best_multi_model.pt'))



EN Results:
Task 1 (l1): 0.6191 | Task 2 (l1): 0.5909
Task 3 (l1): 0.5836 | Task 3 (l3): 0.5591

HI Results:
Task 1 (l1): 0.6365 | Task 2 (l1): 0.5925
Task 3 (l1): 0.6275 | Task 3 (l3): 0.7025

TA Results:
Task 1 (l1): 0.7637 | Task 2 (l1): 0.7627
Task 3 (l1): 0.7753 | Task 3 (l3): 0.8538

=== Predictions ===
Task 1: {'label': 'Gendered Abuse', 'confidence': 0.757101833820343, 'task': 'Task 1 (Original)'}
Task 3: {'Label l1 (Gendered Abuse)': {'prediction': 'Yes', 'confidence': 0.773901641368866}, 'Label l3 (Explicit)': {'prediction': 'Yes', 'confidence': 0.8987719416618347}}
