### **1 - Installing Packages**

In [None]:
!pip install -q unsloth

### **2 - Importing Libraries**

In [None]:
from unsloth import FastLanguageModel
from sklearn.metrics import roc_auc_score
import numpy as np
import pandas as pd
import re
import json
from datetime import datetime

### **3 - Loading Configuration**

In [None]:
with open('config.json', 'r') as file:
    config = json.load(file)

# general 
HGF = config['general']['HGF']

# output model
output_model_local = config['outputs']['output_model_local_Rec']

# model
max_seq_length = config['model']['max_seq_length']
load_in_4bit = config['model']['load_in_4bit']

### **4- Metrics :**

In [4]:
# AUC
def compute_auc(predicted, real):
    y_true = []  # Ground truth (1 for correct, 0 for incorrect)
    y_scores = []  # Rank-based scores (higher rank = higher score)
    
    for real_value, predicted_list in zip(real, predicted):
        # Assign relevance labels
        y_true_row = [1 if c == real_value else 0 for c in predicted_list]
        y_scores_row = list(reversed(range(1, len(predicted_list) + 1)))
        
        y_true.extend(y_true_row)
        y_scores.extend(y_scores_row)

    if len(set(y_true)) < 2:  # Ensure we have both classes (1 and 0)
        return None

    return roc_auc_score(y_true, y_scores) * 100  # Convert to percentage

# MRR
def mean_reciprocal_rank(predicted, real):
    ranks = []
    
    for real_value, predicted_list in zip(real, predicted):
        try:
            rank = predicted_list.index(real_value) + 1  # Rank is 1-based
            ranks.append(1 / rank)
        except ValueError:
            ranks.append(0)  # If real_value is not in predicted_list
    
    return (sum(ranks) / len(ranks)) * 100  # Convert to percentage

# NDCG
def dcg_at_k(r, k):
    """ Compute DCG@k given a binary relevance list r (1 if relevant, 0 otherwise). """
    r = np.array(r[:k])  # Consider only top-k predictions
    return np.sum(r / np.log2(np.arange(1, len(r) + 1) + 1))

def ndcg_at_k(predicted, real, k=5):
    """ Compute nDCG@k for a list of predicted rankings and real labels. """
    ndcgs = []

    for real_value, predicted_list in zip(real, predicted):
        # Relevance vector: 1 if correct, 0 otherwise
        relevance = [1 if c == real_value else 0 for c in predicted_list[:k]]

        # Compute DCG and IDCG
        dcg = dcg_at_k(relevance, k)
        idcg = dcg_at_k([1] * min(k, 1), k)  

        # Compute nDCG
        ndcg = dcg / idcg if idcg > 0 else 0
        ndcgs.append(ndcg)

    return np.mean(ndcgs) * 100  # Convert to percentage

### **5 - Loading Data and model**

In [None]:
test_df = pd.read_csv("Data/MIND-Preprocessed/test.csv")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = output_model_local,
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
)

### **6 - Evaluation**

In [6]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You serve as a personalized news article recommendation system. Based on the user's preference descriptions below and the candidate articles, rank the candidates using their labels.
Output Format:
Ranked News Articles: <START> C#, C#, ..., C# <END>

### Preferences Description:
{}

### Candidates:
{}

### Response:
<think>{} """

In [None]:
def generate_Predictions(df, model, tokenizer):
    predicted = []
    real = []
    results = []
    candidates = []
    
    for (i, row) in df.iterrows():
            desc = row['Descriptions']
            c = row['Candidates']
            inputs = tokenizer([prompt_style.format(desc, c, "")], return_tensors="pt").to("cuda")

            outputs = model.generate(
                input_ids=inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_new_tokens=1000
            )
            response = tokenizer.batch_decode(outputs)
            result = response[0].split("### Response:")[1]
            match = re.search(r'Ranked News Articles\s*:\s*(.*)', result)
            if match:
                after_phrase = match.group(1)
                cs = re.findall(r'C\d+', after_phrase)

            cs = list(dict.fromkeys(re.findall(r'C\d+', after_phrase) + re.findall(r'C\d+', c)))

            print(i)
                                    
            predicted.append(cs)
            real.append(row['Labels'])
            results.append(result)
            candidates.append(c)

            
    return predicted, real, candidates, results

In [None]:
predicted, real, candidates, results = generate_Predictions(test_df, model, tokenizer)

In [None]:
df = pd.DataFrame({
    "Predicted": predicted,
    "Real": real,
    "Candidates" : candidates,
    "Results" : results
})

In [None]:
df.to_csv("Outputs/Output.csv")

In [None]:
auc_score = compute_auc(df["Predicted"], df["Real"])
mrr_score = mean_reciprocal_rank(df["Predicted"], df["Real"])
ndcg_score_5 = ndcg_at_k(df["Predicted"], df["Real"], k=5)
ndcg_score_10 = ndcg_at_k(df["Predicted"], df["Real"], k=10)
_id = datetime.now().strftime("%Y%m%d%H%M%S%f")

In [None]:
metrics = pd.DataFrame({
    "ID": [_id],
    "AUC": [round(auc_score,2)],
    "MRR": [round(mrr_score,2)]
    "NDCG@5":[round(ndcg_score_5,2)]
    "NDCG@10": round(ndcg_score_10,2)
})

In [None]:
metrics

In [None]:
old_metrics = pd.read_csv('Results/metrics.csv', index_col = 0)
metrics = pd.concat([old_metrics, metrics], axis=0, ignore_index=True)
metrics.to_csv('Results/metrics.csv', index=False)