In [None]:
bimport pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, f1_score
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from torch.nn.utils import clip_grad_norm_
from transformers import get_linear_schedule_with_warmup

# Assuming you have train_df and val_df DataFrames with 'utterance' and 'emotion' columns

# Encode labels
label_encoder = LabelEncoder()
df['emotion_encoded'] = label_encoder.fit_transform(df['emotion'])
val_df['emotion_encoded'] = label_encoder.transform(val_df['emotion'])

# Load Hinglish BERT tokenizer and model
tokenizer_bert = AutoTokenizer.from_pretrained("l3cube-pune/hing-bert")
model_bert = AutoModelForSequenceClassification.from_pretrained("l3cube-pune/hing-bert", num_labels=len(label_encoder.classes_))

# Load Hinglish RoBERTa tokenizer and model
tokenizer_roberta = AutoTokenizer.from_pretrained("l3cube-pune/hing-roberta")
model_roberta = AutoModelForSequenceClassification.from_pretrained("l3cube-pune/hing-roberta", num_labels=len(label_encoder.classes_))

# Tokenize and prepare data
def tokenize_data(data, tokenizer, max_length=128):
    tokenized_data = tokenizer(data['utterance'].tolist(), truncation=True, padding='max_length', max_length=max_length, return_tensors='pt')
    return tokenized_data

train_tokenized_bert = tokenize_data(df, tokenizer_bert)
val_tokenized_bert = tokenize_data(val_df, tokenizer_bert)

train_tokenized_roberta = tokenize_data(df, tokenizer_roberta)
val_tokenized_roberta = tokenize_data(val_df, tokenizer_roberta)

# Convert labels to PyTorch tensors
y_train = torch.tensor(df['emotion_encoded'].values)
y_val = torch.tensor(val_df['emotion_encoded'].values)

# Create DataLoader for training and validation
train_dataset_bert = TensorDataset(train_tokenized_bert['input_ids'], train_tokenized_bert['attention_mask'], y_train)
val_dataset_bert = TensorDataset(val_tokenized_bert['input_ids'], val_tokenized_bert['attention_mask'], y_val)

train_dataset_roberta = TensorDataset(train_tokenized_roberta['input_ids'], train_tokenized_roberta['attention_mask'], y_train)
val_dataset_roberta = TensorDataset(val_tokenized_roberta['input_ids'], val_tokenized_roberta['attention_mask'], y_val)

train_dataloader_bert = DataLoader(train_dataset_bert, batch_size=64, shuffle=True)
val_dataloader_bert = DataLoader(val_dataset_bert, batch_size=64, shuffle=False)

train_dataloader_roberta = DataLoader(train_dataset_roberta, batch_size=64, shuffle=True)
val_dataloader_roberta = DataLoader(val_dataset_roberta, batch_size=64, shuffle=False)

# Define training loop for Hinglish BERT
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_bert.to(device)

optimizer_bert = AdamW(model_bert.parameters(), lr=2e-5, weight_decay=0.01)
criterion_bert = CrossEntropyLoss()

total_steps_bert = len(train_dataloader_bert) * num_epochs
scheduler_bert = get_linear_schedule_with_warmup(optimizer_bert, num_warmup_steps=0, num_training_steps=total_steps_bert)

# Define training loop for Hinglish RoBERTa
model_roberta.to(device)

optimizer_roberta = AdamW(model_roberta.parameters(), lr=2e-5, weight_decay=0.01)
criterion_roberta = CrossEntropyLoss()

total_steps_roberta = len(train_dataloader_roberta) * num_epochs
scheduler_roberta = get_linear_schedule_with_warmup(optimizer_roberta, num_warmup_steps=0, num_training_steps=total_steps_roberta)

num_epochs = 5

for epoch in range(num_epochs):
    # Training loop for Hinglish BERT
    model_bert.train()
    for batch in train_dataloader_bert:
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer_bert.zero_grad()
        outputs = model_bert(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        loss = criterion_bert(logits, labels)
        loss.backward()

        clip_grad_norm_(model_bert.parameters(), max_norm=1.0)
        optimizer_bert.step()
        scheduler_bert.step()

    # Training loop for Hinglish RoBERTa
    model_roberta.train()
    for batch in train_dataloader_roberta:
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer_roberta.zero_grad()
        outputs = model_roberta(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        loss = criterion_roberta(logits, labels)
        loss.backward()

        clip_grad_norm_(model_roberta.parameters(), max_norm=1.0)
        optimizer_roberta.step()
        scheduler_roberta.step()

    # Validation
    model_bert.eval()
    model_roberta.eval()
    val_predictions_bert = []
    val_true_labels = []
    val_predictions_roberta = []

    with torch.no_grad():
        for batch in val_dataloader_bert:
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            outputs = model_bert(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=1).cpu().numpy()

            val_predictions_bert.extend(predictions)
            val_true_labels.extend(labels.cpu().numpy())

        for batch in val_dataloader_roberta:
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            outputs = model_roberta(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=1).cpu().numpy()

            val_predictions_roberta.extend(predictions)

    # Combine predictions from both models (you can experiment with different combination strategies)
    val_predictions_ensemble = [max(pred_bert, pred_roberta) for pred_bert, pred_roberta in zip(val_predictions_bert, val_predictions_roberta)]

    val_accuracy = accuracy_score(val_true_labels, val_predictions_ensemble)
    val_report = classification_report(val_true_labels, val_predictions_ensemble)
    val_weighted_f1 = f1_score(val_true_labels, val_predictions_ensemble, average='weighted')

    print(f"Epoch {epoch + 1}/{num_epochs}")
    print("Validation Accuracy:", val_accuracy)
    print("Validation Classification Report:\n", val_report)
    print("Validation Weighted F1 Score:", val_weighted_f1)
    print("-" * 50)
