In [None]:
!pip install transformers accelerate peft datasets wandb bitsandbytes -q

In [None]:
import os
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    AutoModelForCausalLM, TrainingArguments, Trainer
)
from peft import LoraConfig, get_peft_model
import wandb
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

In [None]:
wandb.login()
os.environ["WANDB_PROJECT"] = "Banking77_MoE"
os.environ["WANDB_LOG_MODEL"] = "end"

In [None]:
train_path = "/content/drive/MyDrive/Banking77_Project/data/train.csv"
test_path = "/content/drive/MyDrive/Banking77_Project/data/test.csv"

train_df = pd.read_csv(train_path, names=["text", "label"])
test_df = pd.read_csv(test_path, names=["text", "label"])

# Convert labels → int IDs
label2id = {label: i for i, label in enumerate(sorted(train_df["label"].unique()))}
id2label = {i: label for label, i in label2id.items()}

train_df["label"] = train_df["label"].map(label2id).astype(int)
test_df["label"] = test_df["label"].map(label2id).astype(int)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

dataset = DatasetDict({"train": train_dataset, "test": test_dataset})
num_labels = len(label2id)

print(dataset)
print("Num labels:", num_labels)


In [None]:
bert_model_name = "bert-base-uncased"
gpt2_model_name = "gpt2"

tokenizer_bert = AutoTokenizer.from_pretrained(bert_model_name)
tokenizer_gpt2 = AutoTokenizer.from_pretrained(gpt2_model_name)

# Fix GPT2 padding
if tokenizer_gpt2.pad_token is None:
    tokenizer_gpt2.pad_token = tokenizer_gpt2.eos_token

def tokenize_bert(examples):
    return tokenizer_bert(
        examples["text"], truncation=True, padding="max_length", max_length=64
    )

def tokenize_gpt2(examples):
    return tokenizer_gpt2(
        examples["text"], truncation=True, padding="max_length", max_length=64
    )

# Tokenize datasets
tokenized_dataset_bert = dataset.map(tokenize_bert, batched=True)
tokenized_dataset_bert = tokenized_dataset_bert.map(lambda x: {"labels": x["label"]})

tokenized_dataset_gpt2 = dataset.map(tokenize_gpt2, batched=True)
tokenized_dataset_gpt2 = tokenized_dataset_gpt2.map(lambda x: {"labels": x["label"]})


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }


In [None]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/Banking77_Project/outputs",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    report_to="wandb",
    run_name="HybridMoE_Training"
)


In [None]:
bert_model = AutoModelForSequenceClassification.from_pretrained(
    bert_model_name, num_labels=num_labels
)
dora_config = LoraConfig(
    r=8, lora_alpha=32, lora_dropout=0.1,
    bias="none", task_type="SEQ_CLS"
)
bert_peft = get_peft_model(bert_model, dora_config)
print("BERT+DoRA loaded")


In [None]:
trainer_bert = Trainer(
    model=bert_peft,
    args=training_args,
    train_dataset=tokenized_dataset_bert["train"],
    eval_dataset=tokenized_dataset_bert["test"],
    tokenizer=tokenizer_bert,
    compute_metrics=compute_metrics
)

trainer_bert.train()
trainer_bert.evaluate()

In [None]:

from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model

gpt2_model_name = "gpt2"

# Load GPT2 for sequence classification
gpt2_model = AutoModelForSequenceClassification.from_pretrained(
    gpt2_model_name,
    num_labels=num_labels,                   # 77 classes
    pad_token_id=tokenizer_gpt2.pad_token_id
)

# QLoRA config
qlora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"                     # Important: classification
)

# Wrap model with QLoRA
gpt2_peft = get_peft_model(gpt2_model, qlora_config)
print("GPT2+QLoRA classification model ready")

from transformers import Trainer, TrainingArguments

training_args_gpt2 = TrainingArguments(
    output_dir="/content/drive/MyDrive/Banking77_Project/outputs_gpt2",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs_gpt2",
    report_to="wandb",
    run_name="GPT2_QLoRA_Classification"
)

trainer_gpt2 = Trainer(
    model=gpt2_peft,
    args=training_args_gpt2,
    train_dataset=tokenized_dataset_gpt2["train"],
    eval_dataset=tokenized_dataset_gpt2["test"],
    tokenizer=tokenizer_gpt2,
    compute_metrics=compute_metrics
)

trainer_gpt2.train()
trainer_gpt2.evaluate()


In [None]:
import torch.nn as nn

class HybridMoE(nn.Module):
    def __init__(self, bert_model, gpt2_model, num_labels):
        super().__init__()
        self.bert = bert_model
        self.gpt2 = gpt2_model
        self.router = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 2),
            nn.Softmax(dim=-1)
        )

    def forward(self, input_ids=None, attention_mask=None, labels=None):
        # Expert logits
        bert_logits = self.bert(input_ids=input_ids, attention_mask=attention_mask).logits
        gpt2_logits = self.gpt2(input_ids=input_ids, attention_mask=attention_mask).logits

        router_weights = self.router(input_ids.float()[:, :64])
        logits = router_weights[:, 0].unsqueeze(-1) * bert_logits + router_weights[:, 1].unsqueeze(-1) * gpt2_logits

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits, labels)

        return {"loss": loss, "logits": logits}

moe_model = HybridMoE(bert_peft, gpt2_peft, num_labels=num_labels)
print("Hybrid MoE placeholder ready")

In [None]:
bert_peft.save_pretrained("/content/drive/MyDrive/Banking77_Project/bert_dora")
gpt2_peft.save_pretrained("/content/drive/MyDrive/Banking77_Project/gpt2_qlora")
tokenizer_bert.save_pretrained("/content/drive/MyDrive/Banking77_Project/bert_dora")
tokenizer_gpt2.save_pretrained("/content/drive/MyDrive/Banking77_Project/gpt2_qlora")
