<a href="https://colab.research.google.com/github/ReaganSanz/Netflix-Recommender-LLM-Fine-Tuning-/blob/main/Fine_tuning_movie_reccomender%3F.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Fine-Tuning Mistral on Google Colab for Movie Recommendations

# Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes

# Step 1: Load your dataset (replace with your actual file)
from datasets import load_dataset

dataset = load_dataset("json", data_files={"train": "train.json", "test": "test.json"})

# Example data format:
# {"instruction": "Recommend a Netflix movie based on this input.",
#  "input": "Genre: Sci-Fi\nYear: 2020\nMood: Suspenseful",
#  "output": "You might enjoy 'The Midnight Sky'..."}


In [None]:
# Step 2: Format the dataset into prompt-style

def format_prompt(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
    }

dataset = dataset.map(format_prompt)

In [None]:
# Step 3: Load Mistral model and tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto"
)


In [None]:
# Step 4: Prepare for QLoRA fine-tuning
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)

In [None]:
# Step 5: Tokenize formatted text

def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

train_data = dataset["train"].map(tokenize_function, batched=True)
test_data = dataset["test"].map(tokenize_function, batched=True)


In [None]:
# Step 6: Training with Hugging Face Trainer
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./mistral-movie-recommender",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_dir="./logs",
    fp16=True,
    push_to_hub=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data
)

trainer.train()

In [None]:
# Step 7: Save the model
model.save_pretrained("./mistral-movie-recommender")
tokenizer.save_pretrained("./mistral-movie-recommender")
