In [None]:
!pip install optimum[exporters-tf]

Collecting optimum[exporters-tf]
  Downloading optimum-1.13.2.tar.gz (300 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/301.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/301.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.0/301.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting coloredlogs (from optimum[exporters-tf])
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting transformers[sentencepiece]>=4.26.0 (from optimum[exporters-tf])
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB

In [None]:
!optimum-cli export tflite --help

usage: optimum-cli export tflite [-h] -m MODEL [--task TASK] [--atol ATOL]
                                 [--pad_token_id PAD_TOKEN_ID] [--cache_dir CACHE_DIR]
                                 [--trust-remote-code] [--batch_size BATCH_SIZE]
                                 [--sequence_length SEQUENCE_LENGTH] [--num_choices NUM_CHOICES]
                                 [--width WIDTH] [--height HEIGHT] [--num_channels NUM_CHANNELS]
                                 [--feature_size FEATURE_SIZE] [--nb_max_frames NB_MAX_FRAMES]
                                 [--audio_sequence_length AUDIO_SEQUENCE_LENGTH]
                                 [--quantize {int8-dynamic,int8,int8x16,fp16}]
                                 [--fallback_to_float] [--inputs_type {int8,uint8}]
                                 [--outputs_type {int8,uint8}]
                                 [--calibration_dataset CALIBRATION_DATASET]
                                 [--calibration_dataset_config_name CALIBRATION_DATA

In [None]:
import torch
import json
import numpy as np
import pandas as pd
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5TokenizerFast
from torch.utils.data import Dataset, DataLoader, RandomSampler
from tqdm import tqdm
from torch.optim import Adam

TOKENZIER=T5TokenizerFast.from_pretrained('t5-base')
MODEL=T5ForConditionalGeneration.from_pretrained('t5-base', return_dict=True)
OPTIMIZER=Adam(MODEL.parameters(), lr=0.00001)
Q_LEN=256
T_LEN=64
BATCH_SIZE=4
DEVICE=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Medical(Dataset):
    def __init__(self,tokenizer, data, q_len, t_len):
        self.tokenizer=tokenizer
        self.data=data
        self.q_len=q_len
        self.t_len=t_len
        self.question=self.data['question']
        self.context=self.data['context']
        self.answer=self.data['answer']

    def __len__(self):
        return len(self.question)

    def __getitem__(self, index):
        question=self.question[index]
        context=self.context[index]
        answer=self.answer[index]

        source=self.tokenizer(question, context, max_length=self.q_len, padding="max_length", truncation=True, pad_to_max_length=True, add_special_tokens=True)
        target=self.tokenizer(answer, max_length=self.t_len, padding="max_length", truncation=True, pad_to_max_length=True, add_special_tokens=True)

        labels=torch.tensor(target['input_ids'], dtype=torch.long)
        labels[labels==0]=-100

        return {
            "input_ids": torch.tensor(source['input_ids'], dtype=torch.long),
            "attention_mask": torch.tensor(source['attention_mask'], dtype=torch.long),
            "labels": labels,
            "decoder_attention_mask": torch.tensor(target['attention_mask'], dtype=torch.long)
        }


def get_data(data):
    articles=[]

    for article in data["data"]:
        for paragraph in article["paragraphs"]:
            for qa in paragraph["qas"]:
                question=qa["question"]
                context=paragraph["context"]
                if qa["is_impossible"]:
                    answer="no answer"
                else:
                    answer=qa["answers"][0]["text"]
                inputs={"context": context, "question": question, "answer": answer}
                articles.append(inputs)
    return articles



with open('test_webmd_squad_v2_full.json', 'r') as f:
    train_data = json.load(f)

data=get_data(train_data)

data=pd.DataFrame(data)

with open('test_webmd_squad_v2_consec.json', 'r') as f:
    test_data = json.load(f)

test_data=get_data(test_data)

test_data=pd.DataFrame(test_data)

# DataLoaders

train_sampler=RandomSampler(data.index)
test_sampler=RandomSampler(test_data.index)

medcal_dataset=Medical(TOKENZIER, data, Q_LEN, T_LEN)

train_dataloader=DataLoader(medcal_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)
test_dataloader=DataLoader(medcal_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)


# Training

train_loss=0
val_loss=0
train_batch=0
val_batch=0

for epoch in range(2):
    print(f"Epoch {epoch+1}/{2}")
    print('-'*10)
    MODEL.train()

    for batch in tqdm(train_dataloader,desc="Training batches"):
        input_ids=batch["input_ids"].to(DEVICE)
        attention_mask=batch["attention_mask"].to(DEVICE)
        labels=batch["labels"].to(DEVICE)
        decoder_attention_mask=batch["decoder_attention_mask"].to(DEVICE)

        OPTIMIZER.zero_grad()

        outputs=MODEL(input_ids=input_ids, attention_mask=attention_mask, labels=labels, decoder_attention_mask=decoder_attention_mask)

        loss=outputs.loss
        loss.backward()
        OPTIMIZER.step()
        train_loss+=loss.item()

        train_batch+=1

    print(f"Train loss: {train_loss/train_batch}")

    MODEL.eval()

    for batch in tqdm(test_dataloader,desc="Validation batches"):
        input_ids=batch["input_ids"].to(DEVICE)
        attention_mask=batch["attention_mask"].to(DEVICE)
        labels=batch["labels"].to(DEVICE)
        decoder_attention_mask=batch["decoder_attention_mask"].to(DEVICE)

        outputs=MODEL(input_ids=input_ids, attention_mask=attention_mask, labels=labels, decoder_attention_mask=decoder_attention_mask)

        OPTIMIZER.zero_grad()
        loss=outputs.loss
        loss.backward()
        OPTIMIZER.step()
        val_loss+=loss.item()
        val_batch+=1

    print(f"Validation loss: {val_loss/val_batch}")


# Save model
MODEL.save_pretrained("qa_model")
TOKENZIER.save_pretrained("qa_model_tokenizer")


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


FileNotFoundError: ignored

In [None]:
!optimum-cli export tflite --model t5-base --sequence_length 128 t5_tflite/

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/optimum/exporters/tflite/__main__.py", line 146, in <module>
    main()
  File "/usr/local/lib/python3.10/dist-packages/optimum/exporters/tflite/__main__.py", line 65, in main
    tflite_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="tflite", task=task)
  File "/usr/local/lib/python3.10/dist-packages/optimum/exporters/ta