# Install necessary libraries

In [None]:
# The libraries:
# Transformers - for loading the Llama 2 model
# Datasets - for working with training data
# Accelerate - optimizes model execution
# bitesandbytes - allowes 4-bit quantization (reduces VRAM usage)
# PEFT (Parameter0Efficient Fine-Tuning) - enables lightweight training
# SentencePiece - tokenizes text for models like Llama
# Gradio - creates a web interface to test the model
# torch - PyTorch
!pip install transformers datasets accelerate bitsandbytes peft sentencepiece gradio torch

# Load the Llama 2 Model & Tokenizer


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch
import datasets
import gradio as gr #UI interface

# set model (using a quantized Llama 2-7B model to fit Colab's VRAM)
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# load model in 4-bit mode for efficiency
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto", # auto-detects available GPU
    load_in_4bit=True) # loads a quantized 4-bit version

# Load dataset (replace with actual dataset source)

In [None]:
dataset_source = "path/to/dataset_or_api_call"  # TODO: TEMPORARY VALUE - Replace with real source
dataset = datasets.load_dataset(dataset_source)  # TODO: Example loading function


# Tokenization and training parameters

In [None]:
# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["input"], text_target=examples["output"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# training parameters
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch"
    per_device_train_batch_size=2,  # Low batch size to fit in free-tier GPU
    num_train_epochs=3, # short training run for testing
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

# Train and save model

In [None]:
# Train the model
trainer.train()

# Save fine-tuned model
model.save_pretrained("fine-tuned-feedback-model")
tokenizer.save_pretrained("fine-tuned-feedback-model")

print("Model training completed and saved!")

# Gradio web interface for testing

In [None]:
def generate_feedback(input_text):
  inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
  output = model.generate(**inputs, max_new_tokens=100)
  return tokenizer.decode(output[0])

demo = gr.Interface(fn=generate_feedback, inputs="text", outputs="text")
demo.launch()