<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Model_Ensembling_for_Increased_Robustness.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer

# Define individual models
model1 = AutoModelForCausalLM.from_pretrained("gpt2")
model2 = AutoModelForCausalLM.from_pretrained("gpt2-medium")
model3 = AutoModelForCausalLM.from_pretrained("gpt2-large")

# Define the ensemble model class
class EnsembleFoundationModel(nn.Module):
    def __init__(self, models):
        super().__init__()
        self.models = nn.ModuleList(models)

    def forward(self, input_ids, attention_mask):
        outputs = [model(input_ids, attention_mask=attention_mask).logits for model in self.models]
        return torch.mean(torch.stack(outputs), dim=0)  # Average predictions for ensemble

# Instantiate the ensemble model with the pre-trained models
model_ensemble = EnsembleFoundationModel([model1, model2, model3])

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Example input text
input_text = "Once upon a time"

# Tokenize input text
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# Get ensemble model output
outputs = model_ensemble(input_ids=input_ids, attention_mask=attention_mask)

# Decode the generated tokens (if needed)
generated_text = tokenizer.decode(torch.argmax(outputs, dim=-1).squeeze(), skip_special_tokens=True)

print("Generated text:", generated_text)