In [2]:
!pip install -q transformers datasets sentencepiece gtts

from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd
import torch
import os
from gtts import gTTS
import IPython.display as ipd

# Disable wandb
os.environ["WANDB_DISABLED"] = "true"

df = pd.read_csv("commentary_data.csv").dropna()
df = df.rename(columns={"input": "input_text", "target": "target_text"})
dataset = Dataset.from_pandas(df)

# Tokenize Dataset
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)

def preprocess(example):
    input_ = tokenizer("Commentate: " + example["input_text"], truncation=True, padding="max_length", max_length=64)
    target = tokenizer(example["target_text"], truncation=True, padding="max_length", max_length=64)
    input_["labels"] = target["input_ids"]
    return input_

tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)

# Load Model
model = T5ForConditionalGeneration.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# Training Arguments
training_args = TrainingArguments(
    output_dir="./t5_commentator",
    learning_rate=2e-4,
    per_device_train_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch"
)
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

# Train the Model
trainer.train()

# Save the Model
model.save_pretrained("t5_commentator")
tokenizer.save_pretrained("t5_commentator")

# Inference + Voice
def generate_commentary(text):
    input_ids = tokenizer("Commentate: " + text, return_tensors="pt").input_ids.to(model.device)
    output_ids = model.generate(input_ids, max_length=50)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

def speak(text):
    tts = gTTS(text)
    tts.save("commentary.mp3")
    return ipd.Audio("commentary.mp3")

# Test Cases
test_inputs = [
    "Player 2 drops Mini P.E.K.K.A on the right lane.",
    "Player 2 casts Fireball on Minions.",
    "Player 1 uses Log to clear Skeleton Army.",
    "Player 2 places P.E.K.K.A at the back.",
    "Player 1 freezes tower and troops!",
    "Player 2 destroys the left Princess Tower.",
    "Player 1 wins the game!",
]

for test in test_inputs:
    print(f"🎮 Input: {test}")
    commentary = generate_commentary(test)
    print(f"🗣️ Commentary: {commentary}\n")
    display(speak(commentary))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/110 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
10,4.5992
20,1.6653
30,1.296
40,1.1119
50,0.9791
60,0.8647
70,0.8023
80,0.7405
90,0.7098
100,0.6888


🎮 Input: Player 2 drops Mini P.E.K.K.A on the right lane.
🗣️ Commentary: Mini P.E.E.K.A reaches the right — a slick wing!



🎮 Input: Player 2 casts Fireball on Minions.
🗣️ Commentary: Fireball is a fireball — Player 2 catches fireball!



🎮 Input: Player 1 uses Log to clear Skeleton Army.
🗣️ Commentary: skeptics a squat! Log clears the Skeleton Army — a powerful move!



🎮 Input: Player 2 places P.E.K.K.A at the back.
🗣️ Commentary: P.E.K.A slaps the back — a slick slap — a big push from Player 2!



🎮 Input: Player 1 freezes tower and troops!
🗣️ Commentary: Player 1 freezes tower and troops!



🎮 Input: Player 2 destroys the left Princess Tower.
🗣️ Commentary: Princess Tower is a left-left stricken player 2 — Player 2's left wing — a powerful battle — a quick and powerful attack.



🎮 Input: Player 1 wins the game!
🗣️ Commentary: — Player 1 wins the game!

