In [None]:
!pip install -U peft


Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Downloading peft-0.15.2-py3-none-any.whl (411 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.1/411.1 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)
[0mInstalling collected packages: nvidia-cuda-nvrtc-cu12, peft
  Attempting uninstall: peft
[0m    Found existing installation: peft 0.14.0
    Uninstalling peft-0.14.0:
      Successfully uninstalled peft-0.14.0
[0mSuccessfully installed nvidia-cuda-nvrtc-cu12 peft-0.15.2


In [None]:
# Install required libraries
!pip install transformers accelerate bitsandbytes peft datasets

# Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset, DatasetDict
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer
import torch

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the base model and tokenizer
model_name = "meta-llama/Llama-3.2-1B"  # Replace with your desired model
print(f"Loading model: {model_name}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,  # Use 8-bit precision for LoRA/QLoRA
    device_map="auto"
)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

# Prepare the model for training with LoRA
model = prepare_model_for_kbit_training(model)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank of LoRA
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Updated for Llama 3.2
    lora_dropout=0.1,  # Dropout rate
    bias="none",  # No bias reparameterization
    task_type="CAUSAL_LM"  # Task type
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Load your custom dataset (JSON format)
dataset = load_dataset("json", data_files=["/content/data/Part_1.json", "/content/data/Part_2.json"])
train_data = dataset["train"]

# Check the structure of your dataset
print("Dataset sample:")
print(train_data[0])

# For instruction tuning, format the input properly
def format_instruction(example):
    # Format depends on your use case, but typically for instruction tuning:
    example["formatted_text"] = f"Question: {example['text']}\n\nAnswer: {example['answer']}"
    return example

# Apply formatting to create proper instruction format
formatted_data = train_data.map(format_instruction)

# Split the dataset for training and evaluation
train_eval_split = formatted_data.train_test_split(test_size=0.1, seed=42)
train_dataset = train_eval_split["train"]
eval_dataset = train_eval_split["test"]

# Now tokenize both datasets properly
def tokenize_function(examples):
    tokens = tokenizer(
        examples["formatted_text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

# Apply tokenization and remove original columns
tokenized_train = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=train_dataset.column_names
)
tokenized_eval = eval_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=eval_dataset.column_names
)

print(f"Tokenized training dataset size: {len(tokenized_train)}")
print(f"Tokenized evaluation dataset size: {len(tokenized_eval)}")

# Format the dataset for PyTorch
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training arguments
training_args = TrainingArguments(
    output_dir="./lora_llama_model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_steps=100,
    logging_steps=10,
    save_total_limit=2,
    fp16=True,  # Use mixed precision
    push_to_hub=False,
    remove_unused_columns=False
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,  # Using properly tokenized eval data
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train the model
print("Starting training...")
trainer.train()

# Save the fine-tuned model
print("Saving model...")
model.save_pretrained("./lora_llama_model")
tokenizer.save_pretrained("./lora_llama_model")

# Test the fine-tuned model
print("Testing model with a sample question...")
test_question = "What is a GameObject in Unity?"
inputs = tokenizer(test_question, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
print("\nModel response:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

print("Fine-tuning complete!")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)
[0mInstalling collected packages: nvidia-cuda-nvrtc-cu12
[0mSuccessfully installed nvidia-cuda-nvrtc-cu12
Using device: cuda
Loading model: meta-llama/Llama-3.2-1B


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377
Dataset sample:
{'text': 'What is a GameObject in Unity?', 'answer': 'A GameObject is a fundamental object in Unity that acts as a container for components such as Transform, Renderer, and scripts.'}


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

Map:   0%|          | 0/49 [00:00<?, ? examples/s]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

Tokenized training dataset size: 49
Tokenized evaluation dataset size: 6
Starting training...


  trainer = Trainer(
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
0,No log,2.499544


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Saving model...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Testing model with a sample question...





Model response:
What is a GameObject in Unity? What are the benefits of using GameObjects?
A GameObject is a component of a 3D object in Unity. It is a simple container that holds other objects like meshes, images, scripts, audio, and more. This article will introduce you to the world of Unity GameObjects and how they work.
Unity is a popular game engine that allows developers to create interactive 3D experiences. One of the most important components of a game is the 3D object. This object can be a character, a building, a weapon, or anything else you can imagine. Unity provides a lot of tools to create these objects, but it also gives you the power to create them on your own.
One of the most important tools in Unity is the GameObject. A GameObject is a component of a 3D object in Unity. It is a simple container that holds other objects like meshes, images, scripts, audio, and more. This article will introduce you to the world of Unity GameObjects and how
Fine-tuning complete!
