### **Import Libraries**

In [None]:
!pip install python-dotenv

In [None]:
!pip install sentence-transformers

In [None]:
!pip install chromadb

In [3]:
import pandas as pd
import os
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings
from transformers import pipeline


### **Load Data**

In [4]:
# Load Data
df = pd.read_csv('/content/Amazon Fashion.csv')

# Data Cleaning
remov = ['main_category', 'sub_category', 'image', 'link', 'discount_price']
data = df.drop(columns=remov)

In [None]:
data['ratings'] = pd.to_numeric(data['ratings'], errors='coerce')
data['ratings'].fillna(data['ratings'].mean(), inplace=True)
data['ratings'] = data['ratings'].astype(float).round(1)

data['no_of_ratings'] = data['no_of_ratings'].str.extract('([\d,]+)')[0]  # Extract numeric values with commas
data['no_of_ratings'] = pd.to_numeric(data['no_of_ratings'].str.replace(',', ''), errors='coerce').fillna(0).astype(float)

data['actual_price'] = data['actual_price'].str.replace('₹', '', regex=False).str.replace(',', '')
data['actual_price'] = pd.to_numeric(data['actual_price'], errors='coerce')
data['actual_price'].fillna(data['actual_price'].mean(), inplace=True)
data['actual_price'] = data['actual_price'].astype(float).round(2)


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2352 entries, 0 to 2351
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           2352 non-null   object 
 1   ratings        2352 non-null   float64
 2   no_of_ratings  2352 non-null   float64
 3   actual_price   2352 non-null   float64
dtypes: float64(3), object(1)
memory usage: 73.6+ KB


### **Define Embedding Model and ChromaDB Setup**

In [None]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

class MyEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        return embedding_model.encode(input).tolist()


embed_fn = MyEmbeddingFunction()

#  ChromaDB with a persistent client
client = chromadb.PersistentClient(path="./chromadb")

# Create or retrieve the collection with the custom embedding function
collection = client.get_or_create_collection(
    name="product-recommendation-system",
    embedding_function=embed_fn
)

# Upsert Data into ChromaDB
batch_size = 50
for i in tqdm(range(0, len(data), batch_size)):
    i_end = min(i + batch_size, len(data))
    batch = data.iloc[i:i_end]
    batch_titles = batch['name'].astype(str).tolist()
    batch_ids = [str(index) for index in batch.index]
    batch_metadata = batch[['actual_price', 'ratings', 'no_of_ratings']].to_dict(orient='records')

    # Generate embeddings for the batch
    batch_embeddings = embedding_model.encode(batch_titles).tolist()

    # Upsert to ChromaDB
    collection.upsert(
        ids=batch_ids,
        metadatas=batch_metadata,
        documents=batch_titles,
        embeddings=batch_embeddings,
    )


### **Retriever Function**

In [8]:
from functools import lru_cache

@lru_cache(maxsize=100)
def retrieve_products(user_query, n_results=3):
    """Retrieve products similar to the user query, with caching."""
    retriever_results = collection.query(
        query_texts=[user_query],
        n_results=n_results,
    )
    return retriever_results


### **Generator Function**

In [9]:
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch


In [12]:
from transformers import pipeline

generator = pipeline("text-generation", model="gpt2",device=0 )




In [34]:
def recommend_products(user_query):
    """Complete recommendation process including retrieval, generation, and display."""


    retriever_results = retrieve_products(user_query)

    recommendations = []
    documents = retriever_results.get('documents', [[]])[0]
    metadatas = retriever_results.get('metadatas', [[]])[0]


    llm_inputs = [
        f"Generate a concise, engaging, and non-repetitive description for this product: {doc}. It costs ₹{meta.get('actual_price', 'N/A')}. Rating: {meta.get('ratings', 'N/A')} stars from {meta.get('no_of_ratings', 'N/A')} ratings."
        for doc, meta in zip(documents, metadatas)
    ]

    # Generate descriptions in batch
    llm_outputs = generator(
        llm_inputs,
        max_new_tokens=50,
        num_return_sequences=1,
        truncation=True,
        temperature=0.7,
        pad_token_id=50256
    )

    for doc, meta, output in zip(documents, metadatas, llm_outputs):
        actual_price = meta.get('actual_price', 'N/A')
        ratings = meta.get('ratings', 'N/A')
        rating_count = meta.get('no_of_ratings', 'N/A')

        recommendations.append({
            'Product': doc,
            'Actual Price': f"₹{actual_price}",
            'Rating': ratings,
            'Rating Count': rating_count,
            'Description': output[0]['generated_text'] if isinstance(output, list) else print("None")
        })

    # Print out the recommendations
    for rec in recommendations:
        print("=================== RECOMENDED PRODUCT======================")
        print(f"Product: {rec['Product']}")
        print(f"Price: {rec['Actual Price']}")
        print(f"Rating: {rec['Rating']} stars from {rec['Rating Count']} ratings")
        print(20*"--")
        print(f"LLM given Description: {rec['Description']}\n")


In [37]:
# User input for product recommendation
query = "face cream"
recommend_products(query)

Product: Simple Kind to Skin Replenishing Rich Moisturiser| Face Cream for All Skin Types| No Perfume| No Harsh Chemicals & Paraben...
Price: ₹475.0
Rating: 4.2 stars from 2902.0 ratings
----------------------------------------
LLM given Description: Generate a concise, engaging, and non-repetitive description for this product: Simple Kind to Skin Replenishing Rich Moisturiser| Face Cream for All Skin Types| No Perfume| No Harsh Chemicals & Paraben.... It costs ₹475.0. Rating: 4.2 stars from 2902.0 ratings.

This product is rated as a "No Refurbished" by the CAA. Please contact your local authorities for further details.

Product Description

This unique, low-cost lip gloss is a hydrating, moisture-

Product: Ustraa Total De-Tan Kit - De-Tan Face Cream, 50G - De-Tan Face Scrub With Walnut Granules, 100G- Dermatologically Tested, ...
Price: ₹600.0
Rating: 3.9 stars from 1355.0 ratings
----------------------------------------
LLM given Description: Generate a concise, engaging, and non-r