### Install 3rd party libraries


- SimpleT5 simplifies training pipeline of T5 transformers
- FastT5 convert PyTorch model to ONNX (also to Quantized ONNX)  

In [None]:
!pip install --upgrade simplet5
!pip install fastt5

### Import libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from simplet5 import SimpleT5
from fastT5 import generate_onnx_representation, quantize

### Load dataset

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# df = pd.read_csv('gdrive/MyDrive/mcq-gen/SQuAD-processed.csv')
df = pd.read_csv('gdrive/MyDrive/mcq-gen/SciQ-processed.csv')

### Check

In [None]:
df.head()

### Split dataset

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2)
train_df.shape, test_df.shape

### Train model

- T5-base model is enough for our purpose.
- SimpleT5 uses Adam optimizer with 0.0001 lr.

In [None]:
model = SimpleT5()
model.from_pretrained(model_type='t5', model_name='t5-base')

In [None]:
model.train(train_df=train_df, eval_df=test_df, source_max_token_len=512, target_max_token_len=96, batch_size=8, max_epochs=3, use_gpu=True)

### Load model

- With given config SimpleT5 will save model in each epoch with training and val loss as filename.
- Load model with less loss.

In [None]:
model_dir = 'outputs/OUTPUT_FILE_NAME'

In [None]:
model.load_model(model_type='t5', model_dir=model_dir, use_gpu=True)

### Test

In [None]:
text = "context: President Donald Trump said and predicted that some states would reopen this month. answer: Donald Trump"
model.predict(source_text=text, num_return_sequences=3, num_beams=5, max_length=72)

### Convert to ONNX

In [None]:
onnx_model = generate_onnx_representation(pretrained_version=model_dir, input_sequence_length=512)

### Quantize model

In [None]:
quantized_model = quantize(onnx_model)