In [None]:
!pip install bitsandbytes



Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

In [None]:
#STEP 1 : Install the dependencies (already done)

# STEP 2: Import Libraries
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json
import os
import random

# Disable W&B logging
os.environ["WANDB_DISABLED"] = "true"

# STEP 3: Load dataset from JSONL
with open("dataset.jsonl", "r") as f:
    lines = [json.loads(line) for line in f]

dataset = Dataset.from_list(lines)

# STEP 4: Format dataset for instruction-tuning
def format_prompt(example):
    return {
        "text": (
            f"You are Cuey, a professional pool and snooker coach.\n"
            f"Question: {example['prompt']}\n"
            f"Answer: {example['response']}"
        )
    }

formatted_dataset = dataset.map(format_prompt)

# STEP 5: Tokenize formatted data
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(example):
    tokens = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=256,
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = formatted_dataset.map(tokenize)

# STEP 6: Load model in 4-bit for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# Enable gradient checkpointing to save more memory
base_model.gradient_checkpointing_enable()

# Prepare model for k-bit LoRA training
base_model = prepare_model_for_kbit_training(base_model)

# STEP 7: Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_config)

# STEP 8: Define training arguments
training_args = TrainingArguments(
    output_dir="./cuey_model",
    per_device_train_batch_size=2,
    num_train_epochs=5,
    logging_steps=1,
    save_total_limit=1,
    fp16=True,
    learning_rate=2e-4,
    report_to="none"
)

# STEP 9: Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

# STEP 10: Define chat function for Cuey
def chat_with_cuey(prompt):
    system_prompt = (
    "You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.\n"
    "Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.\n"
    "Use step-by-step advice when helpful, and always stay on-topic.\n"
    "Answer ONLY the question provided. Do NOT ask follow-up questions.\n"
    "Question: "
    )

    full_prompt = system_prompt + prompt + "\nAnswer:"
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.75,
        top_p=0.9,
        repetition_penalty=1.2
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

# STEP 11: Test Cuey after training
for i in range(0,5): #sampling 5 random prompts
  sample_prompt = dataset[random.randint(1,100)]["prompt"]
  reply = chat_with_cuey(sample_prompt)
  print("")
  print(f"Cuey says: {reply}")


Map:   0%|          | 0/534 [00:00<?, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/534 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
1,13.8183
2,13.6232
3,14.4609
4,14.2196
5,14.7972
6,12.8036
7,14.8423
8,14.3387
9,11.0628
10,11.9495


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
  return fn(*args, **kwargs)
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer.


Cuey says: You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.
Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.
Use step-by-step advice when helpful, and always stay on-topic.
Answer ONLY the question provided. Do NOT ask follow-up questions.
Question: When should I use a bridge stick?
Answer: Use Your You You You yourself? You You Go You Can You You You You You You You You feel yourself You You You you you you yourselfYou You You Now You You Your You You You You


Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in 


Cuey says: You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.
Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.
Use step-by-step advice when helpful, and always stay on-topic.
Answer ONLY the question provided. Do NOT ask follow-up questions.
Question: How do I use spin to change the cue ball’s direction?
Answer: Use You you you want you yourself you Your You You yourself? You You You You Your


Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in 


Cuey says: You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.
Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.
Use step-by-step advice when helpful, and always stay on-topic.
Answer ONLY the question provided. Do NOT ask follow-up questions.
Question: How can I improve my consistency in pool?
Answer: Pract You You You You You you yourself you You You You You Your You You You yourselfYou You also furtherYouYou you yourself You You You YouYou You You You You You In Your You You You You have be You YouYour Your You You you want you yourselves You You yourself yourself You Your You Today You You You You You That You You You You You Your You You You You Your You yourself yourself yourself yourself You You You You You You You You You You Your You You You You You You You You you You You You Your Se You You You yourself Your You You You yourself You You yo

Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in 


Cuey says: You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.
Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.
Use step-by-step advice when helpful, and always stay on-topic.
Answer ONLY the question provided. Do NOT ask follow-up questions.
Question: How do I get better at reading angles?
Answer: Use Your YourYou You you bring yourself, you you include You Your You You You You You In You


Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_value=None`.
Caching is incompatible with gradient checkpointing in 


Cuey says: You are Cuey, a world-class pool and snooker coach. You give clear, concise, and technically accurate answers tailored to players of all skill levels.
Do not make up rules or techniques. If unsure, say 'I'm not sure' rather than guessing.
Use step-by-step advice when helpful, and always stay on-topic.
Answer ONLY the question provided. Do NOT ask follow-up questions.
Question: How do I avoid overhitting shots?
Answer: Always Your yourself be likeYou know You You You yourself You You You You you yourselves you You You You You You You You You You You Your YouYour You You You You You want YouYou You You


In [None]:
with open("dataset.jsonl", "r") as f:
    for i, line in enumerate(f):
        print(f"Line {i+1}: {line}")
        if i > 10:
            break


Line 1: {"prompt": "How do I aim properly in pool?", "response": "Use your dominant eye to line up the cue ball with the contact point on the object ball. Keep your head low, your stroke straight, and follow through smoothly."}

Line 2: {"prompt": "What is the best way to practice cue ball control?", "response": "Use drills that focus on hitting the cue ball to specific spots on the table. Practice stopping, following, and drawing the cue ball to learn how spin affects its movement."}

Line 3: {"prompt": "How do I perform a proper break shot in 9-ball?", "response": "Place the cue ball slightly off-center and aim directly at the one ball. Use a controlled but powerful stroke, and follow through while keeping the cue ball on the table."}

Line 4: {"prompt": "What’s a draw shot and when should I use it?", "response": "A draw shot is when you hit the cue ball below center to make it come back after hitting the object ball. Use it when you want to bring the cue ball closer for your next sh