### **1 - Installing Packages**

In [None]:
!pip install -q unsloth

### **2 - Importing Libraries**

In [None]:
import pandas as pd
import json
from unsloth import FastLanguageModel
from trl import SFTTrainer 
from unsloth import is_bfloat16_supported 
from huggingface_hub import login
from transformers import TrainingArguments 
import wandb
from datasets import Dataset, DatasetDict

### **3 - Loading Configuration**

In [4]:
with open('config.json', 'r') as file:
    config = json.load(file)

# general configuration
HGF = config['general']['HGF']
WNB = config['general']['WNB']

# outputs
output_model_online = config['outputs']['output_model_online_Rec']
output_model_local = config['outputs']['output_model_local_Rec']

# model
base_model = config['model']['base_model']
max_seq_length = config['model']['max_seq_length']
load_in_4bit = config['model']['load_in_4bit']

# lora_config
r = config['lora_config']['r']

# fine_tuning
dataset_num_proc = config['fine_tuning']['dataset_num_proc']
per_device_train_batch_size = config['fine_tuning']['per_device_train_batch_size']
gradient_accumulation_steps =  config['fine_tuning']['gradient_accumulation_steps']
epochs =  config['fine_tuning']['epochs']['recommender']
max_steps = config['fine_tuning']['max_steps']
warmup_steps = config['fine_tuning']['warmup_steps']
learning_rate = config['fine_tuning']['learning_rate']
optim = config['fine_tuning']['optim']
weight_decay = config['fine_tuning']['weight_decay']
lr_scheduler_type = config['fine_tuning']['lr_scheduler_type']
output_dir = config['fine_tuning']['output_dir']

### **4 - Reading Data**

In [6]:
train_df = pd.read_csv("Data/MIND-Preprocessed/train.csv", index_col=0)
valid_df = pd.read_csv("Data/MIND-Preprocessed/valid.csv", index_col=0)

In [7]:
train_df.head(3)

Unnamed: 0,history,candidate,label,Description,COT,Targets
0,H1: Panera Bread worker fired after TikTok exp...,C1: Nikki Haley claims top aides tried to recr...,C5,The user appears to be interested in a diverse...,Here's a step-by-step chain of thought leading...,"C5, C1, C6, C7, C3, C2, C4"
1,"H1: Woman, suspect dead at 'Tarzan' actor Ron ...",C1: Chrissy Teigen's weekend was basically a c...,C9,"Based on their reading history, the user appea...",Here's a step-by-step chain of thought leading...,"C9, C1, C3, C5, C8, C2, C10, C4, C6, C7, C11, ..."
2,"H1: Off to the World Series, these Nationals h...",C1: Carrie Underwood Praises Miranda Lambert a...,C7,The user seems to have a diverse interest in n...,Here's a step-by-step chain of thought leading...,"C7, C6, C1, C3, C5, C2, C4"


### **5 - Authentication & Experiment Tracking**

In [None]:
login(HGF)
wandb.login(key=WNB)
run = wandb.init(
    project = "News-Recommender-MIND-LAST-VR-3-5-2025",
    name = "20-epochs",
    job_type="training",
    anonymous="allow"
)

### **6 - Loading DeepSeek R1 : Model & Tokenizer**

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = base_model,
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    token = HGF
)

### **7 - Setting Up Dataset**

In [15]:
temp_1 = Dataset.from_pandas(train_df)
temp_2 = Dataset.from_pandas(valid_df)

dataset = DatasetDict({
    "train": temp_1,
    "validation": temp_2,
    
})

In [None]:
dataset

In [17]:
dataset = dataset.remove_columns(["history","label"])

In [None]:
prompt_style = """ Below is an instruction that describes a task, paired with an input that provied further context.
Write a response that appropiately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You serve as a personalized news article recommendation system. Based on the user's preference descriptions below and the candidate articles, rank the candidates using their labels.
Output Format:
Ranked News Articles: <START> C#, C#, ..., C# <END>

### Preferences Description:
{}

### Candidates:
{}


### Response:
<think>
{}
</think>
Ranked News Articles : {}
"""

In [18]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    desc = examples["Description"]
    candidates = examples["candidate"]
    cots = examples["COT"]
    outputs = examples["Targets"]
    prompts = []
    for des, can, cot, output in zip(desc, candidates, cots, outputs):
        prompt = prompt_style.format(des, can, cot, output) + EOS_TOKEN
        prompts.append(prompt)
    return {
        "prompt": prompts,
    }

In [None]:
dataset_finetune = dataset.map(formatting_prompts_func, batched = True)
dataset_finetune

### **8 - Setting up the model using LORA**

In [20]:
model_lora = FastLanguageModel.get_peft_model(
    model,
    r = r,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",
    random_state=777,
    use_rslora=False,  
    loftq_config=None,
)

Unsloth 2025.4.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


### **9 - Fine-Tuning: Setup and Training**

In [None]:
trainer = SFTTrainer(
    model=model_lora,
    tokenizer=tokenizer,
    train_dataset=dataset_finetune['train'],
    eval_dataset=dataset_finetune['validation'],
    dataset_text_field="prompt",
    max_seq_length=max_seq_length,
    dataset_num_proc=dataset_num_proc,
    args=TrainingArguments(
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        num_train_epochs=epochs,
        max_steps=max_steps,
        warmup_steps=warmup_steps,
        learning_rate=learning_rate,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=20,
        optim=optim,
        weight_decay= weight_decay,
        lr_scheduler_type=lr_scheduler_type,
        seed=777,
        output_dir=output_dir,
    ),
)

In [None]:
trainer.train()

In [None]:
model_lora.push_to_hub(output_model_online) 
tokenizer.push_to_hub(output_model_online)

### **11 - Saving Locally**

In [None]:
model.save_pretrained(output_model_local) 
tokenizer.save_pretrained(output_model_local)

In [None]:
wandb.finish()