**Model Serving**
**What to do:**
1.   First, fine-tune on Colab
2.   Create a notebook
3.   Use Hugging Face datasets (ChemistryQA)



In [2]:
!pip install --upgrade transformers datasets

Collecting transformers
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-4.1.1-py3-none-any.whl.metadata (18 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Downloading transformers-4.56.2-py3-none-any.whl (11.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m39.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-4.1.1-py3-none-any.whl (503 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m503.6/503.6 kB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (42.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling co

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset

In [4]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [5]:
# Load Dataset
dataset = load_dataset("avaliev/ChemistryQA")

# Use a seq2seq model
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train.tsv:   0%|          | 0.00/38.7M [00:00<?, ?B/s]

val.tsv: 0.00B [00:00, ?B/s]

test.tsv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/3430 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/483 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [6]:
def preprocess(examples):
    inputs = ["Q: " + q for q in examples["question"]]
    targets = [a for a in examples["answer"]]
    model_inputs = tokenizer(inputs, max_length=64, truncation=True)

    labels = tokenizer(targets, max_length=64, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess, batched=True)

# ⚡ Small subsets for quick test
small_train = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval = tokenized_datasets["test"].shuffle(seed=42).select(range(200))

# Training args compatible with older transformers
training_args = TrainingArguments(
    output_dir="./results",
    do_eval=True,                     # enables evaluation
    save_strategy="epoch",            # only keep last checkpoint
    num_train_epochs=1,               # quick test
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=1,
    push_to_hub=False,
    report_to=[]                      # disable wandb/tensorboard
)

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_eval,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()
trainer.save_model("./fine_tuned_t5_chem_small")

Map:   0%|          | 0/3430 [00:00<?, ? examples/s]

Map:   0%|          | 0/483 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
10,5.8822
20,4.882
30,4.4296
40,4.0402
50,4.1287
60,4.0557
70,3.7266
80,3.7641
90,3.7422
100,3.7


In [7]:
!zip -r fine_tuned_t5_chem_small.zip ./fine_tuned_t5_chem_small


  adding: fine_tuned_t5_chem_small/ (stored 0%)
  adding: fine_tuned_t5_chem_small/spiece.model (deflated 48%)
  adding: fine_tuned_t5_chem_small/generation_config.json (deflated 27%)
  adding: fine_tuned_t5_chem_small/training_args.bin (deflated 53%)
  adding: fine_tuned_t5_chem_small/tokenizer_config.json (deflated 95%)
  adding: fine_tuned_t5_chem_small/special_tokens_map.json (deflated 85%)
  adding: fine_tuned_t5_chem_small/config.json (deflated 63%)
  adding: fine_tuned_t5_chem_small/tokenizer.json (deflated 74%)
  adding: fine_tuned_t5_chem_small/model.safetensors (deflated 13%)


In [8]:
from google.colab import files
files.download("fine_tuned_t5_chem_small.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>