# LLM for Recommendation System - RAG

## TABLE OF CONTENT
### $~~~$ - 1. Recommendation System
### $~~~$ - 2. Result Interpretation

---
## 1. Recommendation System

In [None]:
# Check Python vision
!python -V
# Check CUDA vision
!nvcc --version

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline
from time import time 
import pandas as pd
import random
import torch
import os

In [None]:
# Check for GPU Availability
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available else "cpu")
#device = 'cpu' # Set to cpu when debugging
print(f"Using device: {device}")

os.environ["TOKENIZERS_PARALLELISM"] = "false"
access_token = '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'
os.environ['HUGGINGFACEHUB_API_TOKEN'] = access_token

base_dir = "../.."

In [None]:
model_id = "meta-llama/Llama-3.2-1B-Instruct"
# model_id = "Qwen/Qwen2.5-1.5B-Instruct"

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print("[*] Tokenizer loaded.")

# Load Model
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    token=access_token,
).to(device)
print("[*] Model loaded.")

In [None]:
embedding_model_id = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
    multi_process=True,
    model_kwargs={"device": device},
    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
)

vector_db_dir = os.path.join(base_dir, 'Vector_DB')
KNOWLEDGE_VECTOR_DATABASE = FAISS.load_local(
    vector_db_dir,
    embeddings=embedding_model,
    allow_dangerous_deserialization=True,
)

In [None]:
formatted_df = pd.read_csv(os.path.join(base_dir, 'trainData/amazon_products.train.formatted.csv'))

def retrieve_product_information(df, query_value):
    product_index = df.index[df['PRODUCT_ID'] == query_value].tolist()[0]
    full_text = formatted_df.loc[product_index, 'TEXT']
    print(f'[*] Retrieved product full content:\n{full_text}')

    return formatted_df.loc[product_index, 'DESCRIPTION'], full_text

In [None]:
Rec_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
    device=device
)

In [None]:
prompt_in_chat_format = [
    {
        "role": "system",
        "content": """Using the information contained in context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Response should include product id, title, and reason for recommendation.
Information of recommended products must be correct, do not falsify information.
If the answer cannot be deduced from the context, do not give an answer.
Answer the question in format: <product id>: <title>\nReason: ...""",
    },
    {
        "role": "user",
        "content": """Context:
{context}
---
Now here is the question you need to answer.

Question: {question}""",
    },
]
RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)
print(RAG_PROMPT_TEMPLATE)

In [None]:
random.seed(time())
random_product_id = random.choice(formatted_df['PRODUCT_ID'])
test_description, full_text = retrieve_product_information(formatted_df, random_product_id)

retrieved_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=test_description, k=11)[1:] # The first one will always be the qurey one, so skip it.
retrieved_docs_text = [
    doc.metadata['text'] for doc in retrieved_docs
]  # We only need the text of the documents

context = "\nExtracted products:"
context += "".join(
    [f"\n\nProduct {str(i)}:::\n" + doc for i, doc in enumerate(retrieved_docs_text)]
)

final_prompt = RAG_PROMPT_TEMPLATE.format(
    question="Base on this product, recommend 5 best products from Context.", context=context
)

In [None]:
# Redact an answer
recommedations = Rec_LLM(final_prompt)[0]["generated_text"]
print(recommedations)

---
## 2. Result Interpretation