# Firish T5 - Minimal Training
Ultra-simple approach to avoid tokenization issues

In [None]:
# Install and import
!pip install transformers torch --quiet

import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
print("✅ Imports successful")

In [None]:
# Load data manually
train_examples = [
    {"input": "translate to firish [parents, child listening, high]: We need to go shopping", "output": "Nous devons aller courses-allachta"},
    {"input": "translate to firish [family, weather, medium]: It's raining outside", "output": "Tá sé raining-allachta dehors"},
    {"input": "translate to firish [couple, restaurant, medium]: The bill is too expensive", "output": "Le bil-allachta est trop cher"},
    {"input": "translate to firish [family, basic, low]: I want to eat now", "output": "Je veux manger maintenant"},
    {"input": "translate to firish [parents, coordination, medium]: We need to go shopping", "output": "Nous besoin aller shopping-ach"}
]

print(f"✅ Loaded {len(train_examples)} examples")

In [None]:
# Load model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

print(f"✅ Loaded {model_name}")
print(f"Parameters: {model.num_parameters():,}")

In [None]:
# Simple training loop (no Trainer class to avoid issues)
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=1e-4)
model.train()

print("🚀 Starting manual training...")

for epoch in range(3):
    total_loss = 0
    
    for i, example in enumerate(train_examples):
        # Tokenize input and output
        inputs = tokenizer(example["input"], return_tensors="pt", padding=True, truncation=True, max_length=64)
        targets = tokenizer(example["output"], return_tensors="pt", padding=True, truncation=True, max_length=64)
        
        # Forward pass
        outputs = model(input_ids=inputs.input_ids, labels=targets.input_ids)
        loss = outputs.loss
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if i % 2 == 0:
            print(f"Epoch {epoch+1}, Example {i+1}, Loss: {loss.item():.4f}")
    
    avg_loss = total_loss / len(train_examples)
    print(f"✅ Epoch {epoch+1} complete. Average loss: {avg_loss:.4f}")

print("✅ Training complete!")

In [None]:
# Test the model
model.eval()

test_input = "translate to firish [family, planning, medium]: We need groceries"
inputs = tokenizer(test_input, return_tensors="pt")

with torch.no_grad():
    outputs = model.generate(
        inputs.input_ids,
        max_length=50,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True
    )

result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"\n🧪 Test translation:")
print(f"Input: {test_input}")
print(f"Output: {result}")

In [None]:
# Save the model
model.save_pretrained("./firish-t5-trained")
tokenizer.save_pretrained("./firish-t5-trained")

print("💾 Model saved to ./firish-t5-trained")
print("🎉 Training pipeline complete!")