## RAG Finetuning LLM
### * **Retrieval based on PDF relevant about fashion recommendation to Vector Database**
### * **Integrating LLM-based attribute aware context with fine-grained fashion retrieval. For each attribute in the query the LLM first generates a detailed attribute-aware context for enriching attribute representations with commonsense business insight requirements.**
### * **The attribute embeddings, enriched with their attribute- aware context, form a conditional query vector that guides the retrieval process, interacting with image patches to focus on relevant regions that match the specified attributes.**
### * **Prompt generation training strategies to enhance its capacity for delivering personalized fashion advice while retaining essential domain knowledge.**
### * **Generative images AI Engineering.**

# LLM Strategies
### These strategies, as reflected in the designed prompts, 
### Ensure that the LLM not only retains its core language processing capabilities but is also finely tuned to analyze and address fashion-related queries with enhanced precision.

## Load & chunk PDF documents

In [6]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pypdf")
from pypdf import PdfReader

def load_pdf_text(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Text chunking
def chunk_text(text, chunk_size=500, overlap=100):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

# call the functions
pdf_text = load_pdf_text("../data/pdf/fashion recommendation LLM.pdf")
pdf_chunks = chunk_text(pdf_text)

In [7]:
pdf_chunks

['Integrating Domain Knowledge into Large Language Models for\nEnhanced Fashion Recommendations\nZhan Shi∗∗\naria2@scu.edu\nSanta Clara University\nSanta Clara, USA\nShanglin Yang†\nkudoysl@gmail.com\nABSTRACT\nFashion, deeply rooted in sociocultural dynamics, evolves as individ-\nuals emulate styles popularized by influencers and iconic figures. In\nthe quest to replicate such refined tastes using artificial intelligence,\ntraditional fashion ensemble methods have primarily used super-\nvised learning to imit',
 'intelligence,\ntraditional fashion ensemble methods have primarily used super-\nvised learning to imitate the decisions of style icons, which falter\nwhen faced with distribution shifts, leading to style replication dis-\ncrepancies triggered by slight variations in input. Meanwhile, large\nlanguage models (LLMs) have become prominent across various\nsectors, recognized for their user-friendly interfaces, strong con-\nversational skills, and advanced reasoning capabilities. T

## Create Text Embeddings

In [8]:
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

model_name = "sentence-transformers/all-MiniLM-L6-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
text_model = AutoModel.from_pretrained(model_name)
text_model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 384, padding_idx=0)
    (position_embeddings): Embedding(512, 384)
    (token_type_embeddings): Embedding(2, 384)
    (LayerNorm): LayerNorm((384,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-5): 6 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=384, out_features=384, bias=True)
            (key): Linear(in_features=384, out_features=384, bias=True)
            (value): Linear(in_features=384, out_features=384, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=384, out_features=384, bias=True)
            (LayerNorm): LayerNorm((384,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)


In [9]:
# tes embedding

text = "This is a test sentence"

inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    outputs = text_model(**inputs)

embeddings = outputs.last_hidden_state.mean(dim=1)
print(embeddings.shape)

torch.Size([1, 384])


In [10]:
import warnings
warnings.filterwarnings("ignore")

def embed_texts(texts):
    with torch.no_grad():
        inputs = tokenizer(
            texts, padding=True, truncation=True, return_tensors="pt"
        )
        outputs = text_model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.cpu().numpy()

# call the function
pdf_embeddings = embed_texts(pdf_chunks)
pdf_embeddings.shape

(59, 384)

## Build FAISS Index

In [11]:
import faiss

dim = pdf_embeddings.shape[1]
index = faiss.IndexFlatL2(dim) # Cosine similarity
faiss.normalize_L2(pdf_embeddings)

index.add(pdf_embeddings)

## Query FAISS

In [12]:
def search_faiss(query, k=5):
    q_emb = embed_texts([query])
    faiss.normalize_L2(q_emb)
    scores, idxs = index.search(q_emb, k)
    return [pdf_chunks[i] for i in idxs[0]]

"""Create a function to query FAISS index and retrieve relevant contexts based on PDF chunks."""
contexts = search_faiss("What outfits are suitable for a summer wear?")

## RAG with LLM (Context-Aware Generation)
---
## Purpose
* ### Use retrieved context
* ### Prevent hallucination
* ### Generate grounded answers


In [13]:
torch.__version__

'2.9.1'

In [14]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


## Load LLM

In [15]:
import os
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

# Create base_dir without __file__
BASE_DIR = os.path.dirname(os.path.abspath("06_Finetuning_LLM_RAG.ipynb"))
load_dotenv(os.path.join(BASE_DIR, ".env"))


HF_TOKEN = os.getenv("HF_TOKEN")

client = InferenceClient(
    token=HF_TOKEN
)

## Build RAG Prompt

In [16]:
def build_prompt(contexts, question):
    context_block = "\n\n".join(contexts)
    return f"""You are a fashion recommendation assistant expert.

Context:
{context_block}

Question:
{question}

Answer:
"""

## Generate Answer

In [17]:
def generate_answer(question):
    contexts = search_faiss(question)
    prompt = build_prompt(contexts, question)
    
    # Use chat_completion with messages format
    messages = [
        {
            "role": "system",
            "content": "You are a fashion recommendation assistant expert."
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
    
    response = client.chat_completion(
        messages=messages,
        model="meta-llama/Llama-3.2-3B-Instruct",
        max_tokens=200,
        temperature=0.7
    )
    
    return response.choices[0].message.content

print(generate_answer("Recommend a summer casual outfit for casual occasions."))

Based on the provided references, I'll provide a fashion recommendation for a summer casual outfit suitable for casual occasions.

Considering the limitations of traditional methods, which are constrained to aligning with the training data and struggle to adapt to new preference distributions, I'll suggest a solution that leverages Large Language Models (LLMs) for improved fashion recommendations.

For a summer casual outfit, I recommend the following combination of items:

1. **Top:** A lightweight, pastel-colored blouse with a relaxed fit and V-neckline. This style is perfect for warm weather and can be easily paired with shorts or a flowy skirt.
2. **Bottom:** Distressed denim shorts in a light wash. These are comfortable, versatile, and can be dressed up or down depending on the occasion.
3. **Shoes:** A pair of white sneakers with a breathable mesh upper. This style is perfect for casual outings and can add a sporty touch to the overall look.
4. **Accessories:** A pair of


# Re-Ranking Model
---
## Purpose
* ### Improves quality
* ### Applies business logic
* ### Applies business logic


In [24]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

# Model: 384 (query) + 384 (item) + 5 (numeric) = 773
rerank_input = layers.Input(shape=(384 + 384 + 5,))  # Changed from 128+5 to 773
x = layers.Dense(128, activation='relu')(rerank_input)  # Increased from 64
x = layers.Dense(64, activation='relu')(x)              # Increased from 32
score = layers.Dense(1, activation='sigmoid')(x)
rerank_model = tf.keras.Model(inputs=rerank_input, outputs=score)

rerank_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', 'AUC']
)

# Re-Ranking Works in Practice

In [25]:
def rerank(candidates, query_embedding):
    scores = []
    for item in candidates:
        combined = np.concatenate([
            query_embedding,           # (384,)
            item['embedding'],         # (384,)
            item['numeric_features']   # (5,)
        ])  # (773,)
        
        score = rerank_model.predict(combined.reshape(1, -1), verbose=0)[0]
        scores.append(score)
    
    return sorted(
        zip(candidates, scores),
        key=lambda x: x[1],
        reverse=True
    )

# Example with 384-dim embeddings
candidates = [
    {
        'embedding': np.random.rand(384),  # Changed from 128
        'numeric_features': np.random.rand(5)
    } for _ in range(10)
]
query_embedding = np.random.rand(384)  # Changed from 128

ranked_results = rerank(candidates, query_embedding=query_embedding)
for item, score in ranked_results[:5]:
    print(f"Score: {score[0]:.4f}")

2026-02-13 15:06:05.515220: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Score: 0.6760
Score: 0.6626
Score: 0.6442
Score: 0.6305
Score: 0.6299


In [23]:
# Check what dimension your FAISS index uses
print(f"FAISS index dimension: {index.d}")

# Then use that dimension
embedding_dim = index.d
candidates = [
    {
        'embedding': np.random.rand(embedding_dim),
        'numeric_features': np.random.rand(5)
    } for _ in range(10)
]
query_embedding = np.random.rand(embedding_dim)

FAISS index dimension: 384


In [27]:
# Check Faiss, embedding dim, and rerank model what we have used
print(f"FAISS index dimension: {index.d}")
print(f"Embedding dimension used: {embedding_dim}")
print(f"Rerank model input shape: {rerank_model.input_shape}")

FAISS index dimension: 384
Embedding dimension used: 384
Rerank model input shape: (None, 773)


## * Save FAISS index + chunks
## *  Save embedding model (Hugging Face)
## * Save re-ranker model (Keras)

In [28]:
import faiss
import pickle
import os

# Save FAISS index + chunks
os.makedirs("artifacts/faiss", exist_ok=True)

# Save FAISS index
faiss.write_index(index, "artifacts/faiss/index.faiss")

# Save chunks (important!)
with open("artifacts/faiss/chunks.pkl", "wb") as f:
    pickle.dump(pdf_chunks, f)

# Save embedding model (Hugging Face)
os.makedirs("artifacts/embedding_model", exist_ok=True)

tokenizer.save_pretrained("artifacts/embedding_model/all-MiniLM-L6-v2")
text_model.save_pretrained("artifacts/embedding_model/all-MiniLM-L6-v2")

# Save re-ranker model (Keras)
os.makedirs("artifacts/reranker", exist_ok=True)

rerank_model.save("artifacts/reranker/model.keras")

# Optional but recommended: save feature schema
import json
config = {
    "query_embedding_dim": 384,
    "doc_embedding_dim": 384,
    "numeric_features_dim": 5,
    "total_features": 773
}

with open("artifacts/reranker/config.json", "w") as f:
    json.dump(config, f, indent=2)

In [35]:
import faiss
import pickle

index = faiss.read_index("../models/RAG_FAISS_LLM/faiss/index.faiss")

with open("../models/RAG_FAISS_LLM/faiss/chunks.pkl", "rb") as f:
    pdf_chunks = pickle.load(f)

from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained(
    "../models/RAG_FAISS_LLM/embedding_model/all-MiniLM-L6-v2"
)

text_model = AutoModel.from_pretrained(
    "../models/RAG_FAISS_LLM/embedding_model/all-MiniLM-L6-v2"
)

text_model.eval()

import tensorflow as tf
import json

rerank_model = tf.keras.models.load_model(
    "../models/RAG_FAISS_LLM/reranker/model.keras"
)

with open("../models/RAG_FAISS_LLM/reranker/config.json") as f:
    rerank_config = json.load(f)