### **1 - Installing Packages**

In [None]:
!pip install -q unsloth

### **2 - Importing Libraries**

In [None]:
import pandas as pd
import re
import json
from unsloth import FastLanguageModel
import torch
from huggingface_hub import login

### **3 - Loading Configuration**

In [None]:
with open('config.json', 'r') as file:
    config = json.load(file)

# general configuration
HGF = config['general']['HGF']

# output models
output_model_local_desc = config['outputs']['output_model_online_Desc']
output_model_local_rec = config['outputs']['output_model_local_Rec']

# model params
max_seq_length = config['model']['max_seq_length']
load_in_4bit = config['model']['load_in_4bit']


### **4 - Loading Framework Components**

In [None]:
model_desc, tokenizer_desc = FastLanguageModel.from_pretrained(
    model_name = output_model_local_desc,
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    token = HGF
)

model_rec, tokenizer_rec = FastLanguageModel.from_pretrained(
    model_name = output_model_local_desc,
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    token = HGF
)

### **5 - Defining Prompt Template**

In [None]:
promptDesc = """ Below is an instruction that describes a task, paired with an input that provied further context.
Write a response that appropiately completes the request.

### Instruction:
You are an interests analyzer. Based on the following user history, analyze their reading habits and generate a description of what kind of news articles they might be interested in reading next. 

### History:
{}

### Response:
Description : \n
{}

"""

In [None]:
promptRec = """ Below is an instruction that describes a task, paired with an input that provied further context.
Write a response that appropiately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You serve as a personalized news article recommendation system. Based on the user's preference descriptions below and the candidate articles, rank the candidates using their labels.
Output Format:
Ranked News Articles: <START> C#, C#, ..., C# <END>

### Preferences Description:
{}

### Candidates:
{}


### Response:
<think>
{}
</think>
Ranked News Articles : {}
"""

### **6 - Assembling the Framework**

In [None]:
def generateDescription(history, model, tokenizer):
    inputs = tokenizer([promptDesc.format(history, "")], return_tensors="pt").to("cuda")
        
    outputs = model.generate(
                input_ids=inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_new_tokens= 900
            )
    response = tokenizer.batch_decode(outputs)
    description = response[0].split("### Response:")[1].split("\nDescription : \n\n\n\n")[1].replace("<｜end▁of▁sentence｜>","")
    return description

def recommendNews(description, candidates, model, tokenizer):

    inputs = tokenizer([promptRec.format(description, candidates, "")], return_tensors="pt").to("cuda")

    outputs = model.generate(
                input_ids=inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_new_tokens=1000
            )
    response = tokenizer.batch_decode(outputs)
    result = response[0].split("### Response:")[1]
    cot = re.findall(r'<think>(.*?)</think>', result, re.DOTALL)
    match = re.search(r'Ranked News Articles\s*:\s*(.*)', result)
    if match:
            after_phrase = match.group(1)
            cs = re.findall(r'C\d+', after_phrase)

            rankedArticles  = list(dict.fromkeys(re.findall(r'C\d+', after_phrase) + re.findall(r'C\d+', candidates)))

    return cot, rankedArticles

def FrameWork(history, candidates):
     
    description = generateDescription(history, model_desc, tokenizer_desc)
    cot, rankedArticles = recommendNews(description, candidates , model_rec ,tokenizer_rec)

    return description, cot, rankedArticles

### **7 - Using the Framewrok**

In [None]:
df = pd.read_csv("Data/MIND-Preprocessed/test.csv")
instance = df.iloc[3]
history = instance['history']
canidates = instance['canidates']
label = instance['label']

In [None]:
discription, cot, ranked_Articles = FrameWork(history, canidates)

print(f"History of user : \n {history} \n")
print(f"Description generated : \n{discription} \n")
print(f"Chain of thought generated : \n{cot} \n")
print(f"The correct next article : \n{label} \n")