In [None]:
!pip install -q pandas scikit-learn matplotlib datasets transformers wandb seaborn captum ipywidgets tqdm

In [None]:
from datasets import load_dataset

train = load_dataset("ai2_arc", 'ARC-Easy', split='train').to_pandas()
test = load_dataset("ai2_arc", 'ARC-Easy', split='test').to_pandas()
dev = load_dataset("ai2_arc", 'ARC-Easy', split='validation').to_pandas()

#mmlu
high_school_test = load_dataset("tasksource/mmlu", 'high_school_biology', split='test').to_pandas()
college_test = load_dataset("tasksource/mmlu", 'college_biology', split='test').to_pandas()
philosophy_test = load_dataset("tasksource/mmlu", 'philosophy', split='test').to_pandas()

high_school_test.rename(columns={'answer': 'answerKey'}, inplace=True)
college_test.rename(columns={'answer': 'answerKey'}, inplace=True)
philosophy_test.rename(columns={'answer': 'answerKey'}, inplace=True)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForMultipleChoice
import random
import os
import numpy as np
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

SEED = 2023
def seed_everything(seed=2023):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
seed_everything(SEED)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Model is in device: {device}')

model_name1 = 'bert-base-uncased'
model_name2 = 'dmis-lab/biobert-base-cased-v1.1-squad'
model_bert = AutoModelForMultipleChoice.from_pretrained(model_name1).to(device)
model_biobert = AutoModelForMultipleChoice.from_pretrained(model_name2).to(device)

tokenizer1 = AutoTokenizer.from_pretrained(model_name1)
tokenizer2 = AutoTokenizer.from_pretrained(model_name2)

model1_weights_path = 'model_state_dicts/bert_pretrained.pth'
model2_weights_path = 'model_state_dicts/biobert_pretrained.pth'

**54%** bert51.pth, biobert52.pth

In [None]:
from multiple_choice_processor import MultipleChoiceProcessor

BATCH_SIZE = 16

processor1 = MultipleChoiceProcessor(tokenizer1, train, dev, test, set='easy')
processor2 = MultipleChoiceProcessor(tokenizer2, train, dev, test, set='easy')
_, val_loader1, test_loader1 = processor1.create_datasets(batch_size=BATCH_SIZE, train_batch_size=None)
_, val_loader2, test_loader2 = processor2.create_datasets(batch_size=BATCH_SIZE, train_batch_size=None)

processor3 = MultipleChoiceProcessor(tokenizer1, high_school_test, college_test, philosophy_test)
processor4 = MultipleChoiceProcessor(tokenizer2, high_school_test, college_test, philosophy_test)
high_school_loader1, college_loader1, philosophy_loader1 = processor3.create_datasets(batch_size=BATCH_SIZE, train_batch_size=None)
high_school_loader2, college_loader2, philosophy_loader2 = processor4.create_datasets(batch_size=BATCH_SIZE, train_batch_size=None)

## (Easy set) validation

In [None]:
from ensemble import EnsemblePipeline

ensemble_pipeline = EnsemblePipeline(model_bert, model_biobert, device, val_loader1, val_loader2)

ensemble_pipeline.load_model_weights(model1_weights_path, model2_weights_path)

ensemble_pipeline.validate() 

## (Easy set) test

In [None]:
from ensemble import EnsemblePipeline

ensemble_pipeline = EnsemblePipeline(model_bert, model_biobert, device, test_loader1, test_loader2)

ensemble_pipeline.load_model_weights(model1_weights_path, model2_weights_path)

ensemble_pipeline.validate() 

## High school biology

In [None]:
from ensemble import EnsemblePipeline

ensemble_pipeline = EnsemblePipeline(model_bert, model_biobert, device, high_school_loader1, high_school_loader2)

ensemble_pipeline.load_model_weights(model1_weights_path, model2_weights_path)

ensemble_pipeline.validate()

## College biology

In [None]:
from ensemble import EnsemblePipeline

ensemble_pipeline = EnsemblePipeline(model_bert, model_biobert, device, college_loader1, college_loader2)

ensemble_pipeline.load_model_weights(model1_weights_path, model2_weights_path)

ensemble_pipeline.validate()

## Philosophy

In [None]:
from ensemble import EnsemblePipeline

ensemble_pipeline = EnsemblePipeline(model_bert, model_biobert, device, philosophy_loader1, philosophy_loader2)

ensemble_pipeline.load_model_weights(model1_weights_path, model2_weights_path)

ensemble_pipeline.validate()