# Firish T5 - Device-Fixed Training
Fixed GPU/CPU device mismatch issues

In [None]:
# Install and import
!pip install transformers torch --quiet

import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"âœ… Using device: {device}")

In [None]:
# Load data manually
train_examples = [
    {"input": "translate to firish [parents, child listening, high]: We need to go shopping", "output": "Nous devons aller courses-allachta"},
    {"input": "translate to firish [family, weather, medium]: It's raining outside", "output": "TÃ¡ sÃ© raining-allachta dehors"},
    {"input": "translate to firish [couple, restaurant, medium]: The bill is too expensive", "output": "Le bil-allachta est trop cher"},
    {"input": "translate to firish [family, basic, low]: I want to eat now", "output": "Je veux manger maintenant"},
    {"input": "translate to firish [parents, coordination, medium]: We need to go shopping", "output": "Nous besoin aller shopping-ach"},
    {"input": "translate to firish [family, planning, low]: We need to go shopping", "output": "Muid gÃ¡ go shopping"},
    {"input": "translate to firish [family, morning rush, medium]: Are you ready for breakfast?", "output": "An bhfuil tÃº ready-ach pour breakfast?"},
    {"input": "translate to firish [parents, bedtime, low]: Are they ready for sleep?", "output": "TÃ¡ siad ready-ach pour sleep?"}
]

print(f"âœ… Loaded {len(train_examples)} examples")

In [None]:
# Load model and move to device
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
model = model.to(device)  # Move model to GPU

print(f"âœ… Loaded {model_name} on {device}")
print(f"Parameters: {model.num_parameters():,}")

In [None]:
# Simple training loop with proper device handling
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=1e-4)
model.train()

print("ðŸš€ Starting manual training...")

for epoch in range(2):  # Reduced to 2 epochs for faster completion
    total_loss = 0
    
    for i, example in enumerate(train_examples):
        # Tokenize input and output and move to device
        inputs = tokenizer(example["input"], return_tensors="pt", padding=True, truncation=True, max_length=64)
        targets = tokenizer(example["output"], return_tensors="pt", padding=True, truncation=True, max_length=64)
        
        # Move tensors to device
        inputs = {k: v.to(device) for k, v in inputs.items()}
        targets = {k: v.to(device) for k, v in targets.items()}
        
        # Forward pass
        outputs = model(input_ids=inputs['input_ids'], labels=targets['input_ids'])
        loss = outputs.loss
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if i % 2 == 0:
            print(f"Epoch {epoch+1}, Example {i+1}, Loss: {loss.item():.4f}")
    
    avg_loss = total_loss / len(train_examples)
    print(f"âœ… Epoch {epoch+1} complete. Average loss: {avg_loss:.4f}")

print("âœ… Training complete!")

In [None]:
# Test the model with proper device handling
model.eval()

test_inputs = [
    "translate to firish [family, planning, medium]: We need groceries",
    "translate to firish [couple, private, low]: The bill is expensive",
    "translate to firish [parents, child nearby, medium]: Are you ready?"
]

print("\nðŸ§ª Testing trained model:")

for test_input in test_inputs:
    inputs = tokenizer(test_input, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}  # Move to device
    
    with torch.no_grad():
        outputs = model.generate(
            inputs['input_ids'],
            max_length=50,
            num_return_sequences=1,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id
        )
    
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    print(f"Input: {test_input}")
    print(f"Output: {result}")
    print("---")

In [None]:
# Save the model
model.save_pretrained("./firish-t5-trained")
tokenizer.save_pretrained("./firish-t5-trained")

print("ðŸ’¾ Model saved to ./firish-t5-trained")
print("ðŸŽ‰ Firish T5 training pipeline complete!")
print("ðŸ“¦ Model ready for download and integration!")