In [1]:
from datasets import load_dataset, Dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer
import pandas as pd
import torch

# Load your CSV file
df = pd.read_csv("commentary_dataset.csv")  # or provide the correct path
df = df.rename(columns={"boring_play": "input", "exciting_commentary": "target"})

# Add prefix for instruction tuning
df["input"] = "boring_play: " + df["input"]

# Split into train/test (90/10)
dataset = Dataset.from_pandas(df)
dataset = dataset.train_test_split(test_size=0.1)

# Load tokenizer and model
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Tokenization function
def preprocess(examples):
    inputs = tokenizer(examples["input"], max_length=64, truncation=True, padding="max_length")
    targets = tokenizer(examples["target"], max_length=64, truncation=True, padding="max_length")
    inputs["labels"] = targets["input_ids"]
    return inputs

# Tokenize dataset
tokenized_datasets = dataset.map(preprocess, batched=True)

# Training arguments
training_args = TrainingArguments(
    output_dir="./flan-t5-basketball",
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=20,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    load_best_model_at_end=True,
    logging_steps=5,
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Save the model
trainer.save_model("./flan-t5-basketball-final")

  from .autonotebook import tqdm as notebook_tqdm


ImportError: 
T5Tokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.
