Reference: https://huggingface.co/docs/transformers/tasks/multiple_choice

In [1]:
# Transformers installation
!pip install transformers datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 5.1 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.4.0-py3-none-any.whl (365 kB)
[K     |████████████████████████████████| 365 kB 52.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 34.1 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 28.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 6.7 MB/s 
Collecting fsspec[http]>=202

In [None]:
from datasets import load_dataset
import pandas as pd

from transformers import create_optimizer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Preprocess

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/455k [00:00<?, ?B/s]

## Prepare a dataset

Datasets with minimum number of samples

Train: 8134 (qasc)

Val: 926

Test: 920
		
		
		

In [None]:
def preprocess_function(examples):
    first_sentences = [[context] * 4 for context in examples["question"]]    #sent1
    # question_headers = examples["question"]                                 #sent2
    question_headers = [''] * len(examples['question'])
    second_sentences = [
        [f"{header} {examples[end][i]}" for end in answer_options] for i, header in enumerate(question_headers)
    ]

    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}

In [None]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import tensorflow as tf


@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="tf",
        )

        batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
        batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
        return batch

In [None]:
data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)

In [None]:
from transformers import TFAutoModelForMultipleChoice

model = TFAutoModelForMultipleChoice.from_pretrained("bert-base-uncased")

Downloading tf_model.h5:   0%|          | 0.00/511M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForMultipleChoice.

Some layers of TFBertForMultipleChoice were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### SCIQ

In [None]:
dataset_sciq = load_dataset("sciq")
dataset_sciq

Downloading builder script:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/817 [00:00<?, ?B/s]



Downloading and preparing dataset sciq/default (download: 2.69 MiB, generated: 7.32 MiB, post-processed: Unknown size, total: 10.01 MiB) to /root/.cache/huggingface/datasets/sciq/default/0.1.0/50e5c6e3795b55463819d399ec417bfd4c3c621105e00295ddb5f3633d708493...


Downloading data:   0%|          | 0.00/2.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11679 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Dataset sciq downloaded and prepared to /root/.cache/huggingface/datasets/sciq/default/0.1.0/50e5c6e3795b55463819d399ec417bfd4c3c621105e00295ddb5f3633d708493. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 11679
    })
    validation: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
})

In [None]:
# adding one more column 'label' to make it compatible with the following code
dataset_sciq['train'] = dataset_sciq['train'].add_column("label", [3] * len(dataset_sciq['train']))
dataset_sciq['validation'] = dataset_sciq['validation'].add_column("label", [3] * len(dataset_sciq['validation']))
dataset_sciq['test'] = dataset_sciq['test'].add_column("label", [3] * len(dataset_sciq['test']))
dataset_sciq

DatasetDict({
    train: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support', 'label'],
        num_rows: 11679
    })
    validation: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support', 'label'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support', 'label'],
        num_rows: 1000
    })
})

In [None]:
answer_options = ["distractor1", "distractor2", "distractor3", "correct_answer"]

tokenized_sciq = dataset_sciq.map(preprocess_function, batched=True)

  0%|          | 0/12 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
sciq_train = tokenized_sciq["train"].shuffle(seed=42).select(range(8134))
sciq_val = tokenized_sciq["validation"].shuffle(seed=42).select(range(926))   #926
sciq_test = tokenized_sciq["test"].shuffle(seed=42).select(range(920))

In [None]:
batch_size = 8

tf_train_set = sciq_train.to_tf_dataset(
    columns=["attention_mask", "input_ids"],
    label_cols=["labels"],
    shuffle=True,
    batch_size=batch_size,
    collate_fn=data_collator,
)

tf_validation_set = sciq_val.to_tf_dataset(
    columns=["attention_mask", "input_ids"],
    label_cols=["labels"],
    shuffle=False,
    batch_size=batch_size,
    collate_fn=data_collator,
)

tf_test_set = sciq_test.to_tf_dataset(
    columns=["attention_mask", "input_ids"],
    label_cols=["labels"],
    shuffle=False,
    batch_size=batch_size,
    collate_fn=data_collator,
)

In [None]:
# create optimizer
num_train_epochs = 2
total_train_steps = (len(tokenized_sciq["train"]) // batch_size) * num_train_epochs
optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

####Load the model and train

In [None]:
model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=tf.metrics.SparseCategoricalAccuracy(),
)

model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f8ca79c5410>

In [None]:
model.evaluate(tf_test_set)



[1.5650384426116943, 0.644565224647522]

In [None]:
model.evaluate(tf_validation_dataset_cs)



[2.9613893032073975, 0.3455723524093628]

In [None]:
model.evaluate(tf_validation_dataset_qasc)



[5.295804023742676, 0.18023112416267395]

###Commonsense QA

In [None]:
dataset_cs = load_dataset("commonsense_qa")
# tokenized_cs = dataset_cs.map(tokenize_function, batched=True)
# tokenized_cs
dataset_cs

Downloading builder script:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.31k [00:00<?, ?B/s]



Downloading and preparing dataset commonsense_qa/default (download: 4.46 MiB, generated: 2.61 MiB, post-processed: Unknown size, total: 7.08 MiB) to /root/.cache/huggingface/datasets/commonsense_qa/default/1.0.0/28d68f56649a7f0c23bc68eae850af914aa03f95f810011ae8cf58cc5ff5051b...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/3.79M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/472k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/423k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/9741 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1221 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1140 [00:00<?, ? examples/s]

Dataset commonsense_qa downloaded and prepared to /root/.cache/huggingface/datasets/commonsense_qa/default/1.0.0/28d68f56649a7f0c23bc68eae850af914aa03f95f810011ae8cf58cc5ff5051b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 9741
    })
    validation: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1221
    })
    test: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1140
    })
})

In [None]:
def split_choices(df_train_cs):
    option_1 = []
    option_2 = []
    option_3 = []
    option_4 = []
    option_5 = []

    for i in range(len(df_train_cs['choices'])):
        option_1.append(df_train_cs['choices'][i]['text'][0])
        option_2.append(df_train_cs['choices'][i]['text'][1])
        option_3.append(df_train_cs['choices'][i]['text'][2])
        option_4.append(df_train_cs['choices'][i]['text'][3])
        option_5.append(df_train_cs['choices'][i]['text'][4])

    df_train_options = pd.DataFrame({
        'option1': option_1,
        'option2': option_2,
        'option3': option_3,
        'option4': option_4,
        'option5': option_5,
    })
    
    return df_train_options

In [None]:
df_train_cs = pd.DataFrame(dataset_cs['train'])
df_val_cs = pd.DataFrame(dataset_cs['validation'])
df_test_cs = pd.DataFrame(dataset_cs['test'])

df_train_options = split_choices(df_train_cs)
df_val_options = split_choices(df_val_cs)
df_test_options = split_choices(df_test_cs)

In [None]:
# train
df_train_ans = df_train_cs['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4})
df_train_cs = pd.DataFrame({
    'id': dataset_cs['train']['id'],
    'question': dataset_cs['train']['question'],
    'option1': df_train_options['option1'],
    'option2': df_train_options['option2'],
    'option3': df_train_options['option3'],
    'option4': df_train_options['option4'],
    'option5': df_train_options['option5'],
    'label': df_train_ans,
})

# val
df_val_ans = df_val_cs['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4})
df_val_cs = pd.DataFrame({
    'id': dataset_cs['validation']['id'],
    'question': dataset_cs['validation']['question'],
    'option1': df_val_options['option1'],
    'option2': df_val_options['option2'],
    'option3': df_val_options['option3'],
    'option4': df_val_options['option4'],
    'option5': df_val_options['option5'],
    'label': df_val_ans,
})

# test
df_test_ans = df_test_cs['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4})
df_test_cs = pd.DataFrame({
    'id': dataset_cs['test']['id'],
    'question': dataset_cs['test']['question'],
    'option1': df_test_options['option1'],
    'option2': df_test_options['option2'],
    'option3': df_test_options['option3'],
    'option4': df_test_options['option4'],
    'option5': df_test_options['option5'],
    'label': df_test_ans,
})

In [None]:
import datasets
from datasets import Dataset

In [None]:
dataset_cs = datasets.DatasetDict(
    {"train": Dataset.from_pandas(df_train_cs),
    "validation": Dataset.from_pandas(df_val_cs),
    "test": Dataset.from_pandas(df_test_cs)
    })
dataset_cs

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'option1', 'option2', 'option3', 'option4', 'option5', 'label'],
        num_rows: 9741
    })
    validation: Dataset({
        features: ['id', 'question', 'option1', 'option2', 'option3', 'option4', 'option5', 'label'],
        num_rows: 1221
    })
    test: Dataset({
        features: ['id', 'question', 'option1', 'option2', 'option3', 'option4', 'option5', 'label'],
        num_rows: 1140
    })
})

In [None]:
def preprocess_function_5(examples):
    first_sentences = [[context] * 5 for context in examples["question"]]    #sent1
    # question_headers = examples["question"]                                 #sent2
    question_headers = [''] * len(examples['question'])
    second_sentences = [
        [f"{header} {examples[end][i]}" for end in answer_options] for i, header in enumerate(question_headers)
        # [f"{examples[end]}" for end in answer_options]
    ]

    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    return {k: [v[i : i + 5] for i in range(0, len(v), 5)] for k, v in tokenized_examples.items()}

In [None]:
answer_options = ["option1", "option2", "option3", "option4", "option5"]
tokenized_cs = dataset_cs.map(preprocess_function_5, batched=True)

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [None]:
cs_train = tokenized_cs["train"].shuffle(seed=42).select(range(8134))
cs_val = tokenized_cs["validation"].shuffle(seed=42).select(range(926))   #926
cs_test = tokenized_cs["test"].shuffle(seed=42).select(range(920))

batch_size = 8

tf_train_dataset_cs = cs_train.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=batch_size,
)

tf_validation_dataset_cs = cs_val.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=batch_size,
)

# tf_test_dataset_cs = cs_test.to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["labels"],
#     shuffle=True,
#     collate_fn=data_collator,
#     batch_size=batch_size,
# )

In [None]:
# create optimizer
num_train_epochs = 2
total_train_steps = (len(tokenized_cs["train"]) // batch_size) * num_train_epochs
optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

In [None]:
# train on sst2

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=tf.metrics.SparseCategoricalAccuracy(),
)

# model.fit(tf_train_dataset_cs, validation_data=tf_validation_dataset_cs, epochs=1)
model.fit(tf_train_dataset_cs, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f8ad21e7c50>

In [None]:
model.evaluate(tf_validation_dataset_cs)



[2.2215051651000977, 0.49136069416999817]

In [None]:
model.evaluate(tf_test_set)



[1.7380845546722412, 0.539130449295044]

In [None]:
model.evaluate(tf_validation_dataset_qasc)



[0.4922811985015869, 0.8492746353149414]

### QASC

In [None]:
dataset_qasc = load_dataset("qasc")
# tokenized_qasc = dataset_qasc.map(tokenize_function, batched=True)
# tokenized_qasc
dataset_qasc

Downloading builder script:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/844 [00:00<?, ?B/s]



Downloading and preparing dataset qasc/default (download: 1.54 MiB, generated: 5.60 MiB, post-processed: Unknown size, total: 7.14 MiB) to /root/.cache/huggingface/datasets/qasc/default/0.1.0/a8c2ff717429f8f9041f665234865cc42c93d4b1b3c4f16a1e119a85366714ad...


Downloading data:   0%|          | 0.00/1.62M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8134 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/920 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/926 [00:00<?, ? examples/s]

Dataset qasc downloaded and prepared to /root/.cache/huggingface/datasets/qasc/default/0.1.0/a8c2ff717429f8f9041f665234865cc42c93d4b1b3c4f16a1e119a85366714ad. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'choices', 'answerKey', 'fact1', 'fact2', 'combinedfact', 'formatted_question'],
        num_rows: 8134
    })
    test: Dataset({
        features: ['id', 'question', 'choices', 'answerKey', 'fact1', 'fact2', 'combinedfact', 'formatted_question'],
        num_rows: 920
    })
    validation: Dataset({
        features: ['id', 'question', 'choices', 'answerKey', 'fact1', 'fact2', 'combinedfact', 'formatted_question'],
        num_rows: 926
    })
})

In [None]:
def split_choices_qasc(df_train_cs):
    option_1 = []
    option_2 = []
    option_3 = []
    option_4 = []
    option_5 = []
    option_6 = []
    option_7 = []
    option_8 = []

    for i in range(len(df_train_cs['choices'])):
        option_1.append(df_train_cs['choices'][i]['text'][0])
        option_2.append(df_train_cs['choices'][i]['text'][1])
        option_3.append(df_train_cs['choices'][i]['text'][2])
        option_4.append(df_train_cs['choices'][i]['text'][3])
        option_5.append(df_train_cs['choices'][i]['text'][4])
        option_6.append(df_train_cs['choices'][i]['text'][5])
        option_7.append(df_train_cs['choices'][i]['text'][6])
        option_8.append(df_train_cs['choices'][i]['text'][7])

    df_train_options = pd.DataFrame({
        'option1': option_1,
        'option2': option_2,
        'option3': option_3,
        'option4': option_4,
        'option5': option_5,
        'option6': option_6,
        'option7': option_7,
        'option8': option_8,
    })
    
    return df_train_options

In [None]:
df_train_qasc = pd.DataFrame(dataset_qasc['train'])
df_val_qacs = pd.DataFrame(dataset_qasc['validation'])
# df_test_cs = pd.DataFrame(dataset_cs['test'])

df_train_options_qasc = split_choices_qasc(df_train_qasc)
df_val_options_qasc = split_choices_qasc(df_val_qacs)
# df_test_options = split_choices_qasc(df_test_cs)

In [None]:
# train
df_train_ans = df_train_qasc['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7})
df_train_cs = pd.DataFrame({
    'id': dataset_qasc['train']['id'],
    'question': dataset_qasc['train']['question'],
    'option1': df_train_options_qasc['option1'],
    'option2': df_train_options_qasc['option2'],
    'option3': df_train_options_qasc['option3'],
    'option4': df_train_options_qasc['option4'],
    'option5': df_train_options_qasc['option5'],
    'option6': df_train_options_qasc['option6'],
    'option7': df_train_options_qasc['option7'],
    'option8': df_train_options_qasc['option8'],
    'label': df_train_ans,
})

# val
df_val_ans = df_val_qacs['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7})
df_val_cs = pd.DataFrame({
    'id': dataset_qasc['validation']['id'],
    'question': dataset_qasc['validation']['question'],
    'option1': df_val_options_qasc['option1'],
    'option2': df_val_options_qasc['option2'],
    'option3': df_val_options_qasc['option3'],
    'option4': df_val_options_qasc['option4'],
    'option5': df_val_options_qasc['option5'],
    'option6': df_val_options_qasc['option6'],
    'option7': df_val_options_qasc['option7'],
    'option8': df_val_options_qasc['option8'],
    'label': df_val_ans,
})

# # test
# df_test_ans = df_test_cs['answerKey'].replace({'A':0, 'B':1, 'C':2, 'D':3, 'E':4})
# df_test_cs = pd.DataFrame({
#     'id': dataset_cs['test']['id'],
#     'question': dataset_cs['test']['question'],
#     'option1': df_test_options['option1'],
#     'option2': df_test_options['option2'],
#     'option3': df_test_options['option3'],
#     'option4': df_test_options['option4'],
#     'option5': df_test_options['option5'],
#     'label': df_test_ans,
# })

In [None]:
dataset_qasc = datasets.DatasetDict(
    {"train": Dataset.from_pandas(df_train_cs),
    "validation": Dataset.from_pandas(df_val_cs),
    # "test": Dataset.from_pandas(df_test_cs)
    })
dataset_qasc

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'option1', 'option2', 'option3', 'option4', 'option5', 'option6', 'option7', 'option8', 'label'],
        num_rows: 8134
    })
    validation: Dataset({
        features: ['id', 'question', 'option1', 'option2', 'option3', 'option4', 'option5', 'option6', 'option7', 'option8', 'label'],
        num_rows: 926
    })
})

In [None]:
def preprocess_function_8(examples):
    first_sentences = [[context] * 8 for context in examples["question"]]    #sent1
    # question_headers = examples["question"]                                 #sent2
    question_headers = [''] * len(examples['question'])
    second_sentences = [
        [f"{header} {examples[end][i]}" for end in answer_options] for i, header in enumerate(question_headers)
        # [f"{examples[end]}" for end in answer_options]
    ]

    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    return {k: [v[i : i + 8] for i in range(0, len(v), 8)] for k, v in tokenized_examples.items()}

answer_options = ["option1", "option2", "option3", "option4", "option5", "option6", "option7", "option8"]
tokenized_qasc = dataset_qasc.map(preprocess_function_8, batched=True)

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
qasc_train = tokenized_qasc["train"].shuffle(seed=42)
qasc_val = tokenized_qasc["train"].shuffle(seed=42)
# qasc_test = tokenized_qasc["test"].shuffle(seed=42)

tf_train_dataset_qasc = qasc_train.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8,
)

tf_validation_dataset_qasc = qasc_val.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8,
)

# tf_test_dataset_qasc = qasc_test.to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["labels"],
#     shuffle=True,
#     collate_fn=data_collator,
#     batch_size=8,
# )

In [None]:
# train on yelp

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=tf.metrics.SparseCategoricalAccuracy(),
)

# model.fit(tf_train_dataset_qasc, validation_data=tf_validation_dataset_qasc, epochs=1)
model.fit(tf_train_dataset_qasc, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f8ad800e450>

In [None]:
model.evaluate(tf_validation_dataset_qasc)



[0.32239827513694763, 0.929800808429718]

In [None]:
model.evaluate(tf_test_set)



[1.2246373891830444, 0.5141304135322571]

In [None]:
model.evaluate(tf_validation_dataset_cs)



[1.470718502998352, 0.48380130529403687]