In [None]:
!pip install unsloth trl datasets

In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

# Check what device is available
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device count: {torch.cuda.device_count() if torch.cuda.is_available() else 'N/A'}")

# Use CPU mode if no GPU (for TPU, we'll need different setup)
# For now, let's try with CPU as fallback
use_gpu = torch.cuda.is_available()

# 1. Load the Base "Small" Model
# Remove 4bit quantization for TPU compatibility
try:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/tinyllama-chat-bnb-4bit" if use_gpu else "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        max_seq_length = 2048,
        load_in_4bit = use_gpu,  # Only use 4bit if GPU available
    )
except Exception as e:
    print(f"Error loading model: {e}")
    print("Trying alternative model...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        max_seq_length = 2048,
        load_in_4bit = False,
    )

# 2. Setup the "Learning Layer" (LoRA)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",    
)

# 3. Load your brand data
dataset = load_dataset("json", data_files="brand_strategy_data.jsonl", split="train")
print(f"Dataset loaded with {len(dataset)} examples")

# 4. Start the Training
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    dataset_text_field = "output",
    max_seq_length = 2048,
    args = TrainingArguments(
        per_device_train_batch_size = 1 if not use_gpu else 2,  # Smaller batch for CPU/TPU
        gradient_accumulation_steps = 4,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = use_gpu and torch.cuda.is_bf16_supported(),  # Only use fp16 on GPU
        logging_steps = 1,
        output_dir = "brand_model_outputs",
        use_cpu = not use_gpu,  # Use CPU if no GPU
    ),
)
trainer.train()


NotImplementedError: Unsloth cannot find any torch accelerator? You need a GPU.

In [None]:
# For Google Colab with TPU:
# 1. Make sure you're connected to Colab first
# 2. Run this cell to check your device
import torch
import os

print("=" * 50)
print("DEVICE DETECTIONe")
print("=" * 50)
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

# Check if running on Colab
try:
    from google.colab import auth
    auth.authenticate_user()
    print("✓ Running on Google Colab")
    IN_COLAB = True
except:
    print("✗ Not running on Google Colab")
    IN_COLAB = False

# If TPU is available in Colab, show it
if IN_COLAB:
    import tensorflow as tf
    tpu_devices = tf.config.list_physical_devices('TPU')
    print(f"TPU devices found: {len(tpu_devices)}")
    if tpu_devices:
        print("✓ TPU is available!")


DEVICE DETECTION
CUDA Available: False
Device: cpu
✗ Not running on Google Colab
✗ Not running on Google Colab


In [None]:
FastLanguageModel.for_inference(model) # Fast mode
inputs = tokenizer(
    [
        "Instruction: What is the most important part of a brand strategy?\nOutput:"
    ], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 100)
print(tokenizer.batch_decode(outputs)[0])