# Advanced RAG

In [None]:
s3_uri= "s3://ashwin-partner-bucket/fashion_dataset.jsonl"

# Sample Dataset with OpenAIEmbedding already included in the dataset

In [None]:
import pandas as pd
import s3fs
import boto3
from dotenv import load_dotenv

load_dotenv()

df = pd.read_json(s3_uri, orient="records", lines=True)
df[:3]

Unnamed: 0,ageGroup,link,brandName,price,title,gender,subCategory,masterCategory,season,articleType,baseColour,id,openAIVec
0,Adults-Women,http://assets.myntassets.com/v1/images/style/p...,Inc 5,1390.0,Inc. 5 Women Casual White Flats,Women,Shoes,Footwear,Winter,Heels,White,22275,"[-0.016259265699646003, -0.016057870922684, -0..."
1,Adults-Women,http://assets.myntassets.com/v1/images/style/p...,French Connection,3999.0,French Connection Women Black Sling Bag,Women,Bags,Accessories,Summer,Handbags,Black,42874,"[-0.022201561751436002, 0.006381784873631001, ..."
2,Kids-Girls,http://assets.myntassets.com/v1/images/style/p...,Q&Q,625.0,Q&Q Kids Girls White Dial Analog Watch,Girls,Watches,Accessories,Winter,Watches,Pink,49888,"[-0.001154974972115, 0.010144626266777, -7.218..."


# Insert Data to MongoDB Atlas

In [None]:
from pymongo import MongoClient
import certifi
mongo_client = MongoClient(os.environ["MONGODB_CONNECTION_STR"], tlsCAFile=certifi.where())
# Upload documents along with vector embeddings to MongoDB Atlas Collection
col = mongo_client["search"]["catalog_final_myn"]
col.insert_many(df.to_dict(orient="records"))

In [None]:

from langchain_core.output_parsers import JsonOutputParser # type: ignore
from langchain_core.prompts import PromptTemplate # type: ignore
from langchain_core.pydantic_v1 import BaseModel, Field # type: ignore
from langchain_openai import ChatOpenAI # type: ignore

from langchain_openai.embeddings import OpenAIEmbeddings # type: ignore
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch # type: ignore


from pymongo import MongoClient # type: ignore
from typing import List
from itertools import chain
import certifi # type: ignore
import os
from dotenv import load_dotenv # type: ignore

load_dotenv()

from functools import lru_cache

@lru_cache
def get_openai_emb_transformers():
    """
    Returns an instance of OpenAIEmbeddings for OpenAI transformer models.
    
    This function creates and returns an instance of the OpenAIEmbeddings class,
    which provides access to OpenAI transformer models for natural language processing.
    The instance is cached using the lru_cache decorator for efficient reuse.
    
    Returns:
        embeddings (OpenAIEmbeddings): An instance of the OpenAIEmbeddings class.
    """
    embeddings = OpenAIEmbeddings()
    return embeddings

@lru_cache
def get_vector_store():
    """
    Retrieves the vector store for MongoDB Atlas.

    Returns:
        MongoDBAtlasVectorSearch: The vector store object.
    """
    vs = MongoDBAtlasVectorSearch(collection=col, embedding=get_openai_emb_transformers(), index_name="vector_index_openAi_cosine", embedding_key="openAIVec", text_key="title")
    return vs

@lru_cache(10)
def get_conversation_chain_conv():
    """
    Retrieves a conversation chain model for chat conversations.

    Returns:
        ChatOpenAI: The conversation chain model for chat conversations.
    """
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2, max_tokens=2048)
    # chain = ConversationChain(llm=llm, memory=ConversationBufferWindowMemory(k=5))
    return llm


# Define your desired data structure.
class ProductRecoStatus(BaseModel):
    """
    Represents the status of product recommendations.

    Attributes:
        relevancy_status (bool): Product recommendation status conditioned on the context of the input query.
                                 True if the query is related to purchasing fashion clothing and/or accessories.
                                 False otherwise.
        recommendations (List[str]): List of recommended product titles based on the input query context and
                                     if the relevancy_status is True.
    """
    relevancy_status: bool = Field(description="Product recommendation status is conditioned on the fact if the context of input query is to purchase a fashion clothing and or fashion accessories.")
    recommendations: List[str] = Field(description="list of recommended product titles based on the input query context and if recommendation_status is true.")


class Product(BaseModel):
    """
    Represents a product.

    Attributes:
        title (str): Title of the product.
        baseColour (List[str]): List of base colours of the product.
        gender (List[str]): List of genders the product is targeted for.
        articleType (str): Type of the article.
        mfg_brand_name (str): Manufacturer or brand name of the product.
    """
    title: str = Field(description="Title of the product.")
    baseColour: List[str] = Field(description="List of base colours of the product.")
    gender: List[str] = Field(description="List of genders the product is targeted for.")
    articleType: str = Field(description="Type of the article.")
    mfg_brand_name: str = Field(description="Manufacturer or brand name of the product.")


class Recommendations(BaseModel):
    """
    Represents a set of recommendations for products and a message to the user.

    Attributes:
        products (List[Product]): List of recommended products.
        message (str): Message to the user and context of the chat history summary.
    """
    products: List[Product] = Field(description="List of recommended products.")
    message: str = Field(description="Message to the user and context of the chat history summary.")


reco_status_parser = JsonOutputParser(pydantic_object=ProductRecoStatus)

reco_status_prompt = PromptTemplate(
    template="You are AI assistant tasked at identifying if there is a product purchase intent in the query and providing suitable fashion recommendations.\n{format_instructions}\n{query}\n\
        #Chat History Summary: {chat_history}\n\nBased on the context of the query, please provide the relevancy status and list of recommended products.",
    input_variables=["query", "chat_history"],
    partial_variables={"format_instructions": reco_status_parser.get_format_instructions()},
)

reco_parser = JsonOutputParser(pydantic_object=Recommendations)
reco_prompt = PromptTemplate(
    input_variables=["question", "recommendations", "chat_history"],
    partial_variables={"format_instructions": reco_parser.get_format_instructions()},
    template="\n User query:{question} \n Chat Summary: {chat_history} \n Rank and suggest me suitable products for creating grouped product recommendations given all product recommendations below feature atleast one product for each articleType \n {recommendations} \n show output in {format_instructions} for top 10 products"
)


def get_product_reco_status(query: str, chat_history: List[str] = []):
    """
    Retrieves the recommendation status for a product based on the given query and chat history.

    Args:
        query (str): The query to be used for retrieving the recommendation status.
        chat_history (List[str]): The chat history containing previous conversations.

    Returns:
        The response containing the recommendation status.
    """
    llm = get_conversation_chain_conv()
    chain = reco_status_prompt | llm | reco_status_parser
    resp = chain.invoke({"query": query, "chat_history": chat_history})
    return resp

def get_sorted_results(product_recommendations):
    all_titles = [rec['title'] for rec in product_recommendations['products']]
    results = list(col.find({"title": {"$in":all_titles}}, {"_id": 0 , "id":1, "title": 1, "price": 1, "baseColour": 1, "articleType": 1, "gender": 1, "link" : 1, "mfg_brand_name": 1}))
    sorted_results = []
    for title in all_titles:
        for result in results:
            if result['title'] == title:
                sorted_results.append(result)
                break
    return sorted_results

def get_product_recommendations(query: str, reco_queries: List[str], chat_history: List[str]=[]):
    """
    Retrieves product recommendations based on the given query and chat history.

    Args:
        query (str): The query string for the recommendation.
        chat_history (List[str]): The list of previous chat messages.
        filter_query (dict): The filter query to apply during the recommendation retrieval.
        reco_queries (List[str]): The list of recommendation queries.

    Returns:
        dict: The response containing the recommendations.

    """
    vectorstore = get_vector_store()
    retr = vectorstore.as_retriever(search_kwargs={"k": 10})
    all_recommendations = list(chain(*retr.batch(reco_queries)))
    llm = get_conversation_chain_conv()
    llm_chain = reco_prompt | llm | reco_parser
    resp = llm_chain.invoke({"question": query, "chat_history": chat_history, "recommendations": [v.page_content for v in all_recommendations]})
    resp = get_sorted_results(resp)
    return resp

In [None]:
query = "Can you suggest me some Casual dresses for date occassion with my boy friend"
status = get_product_reco_status(query)
print(status)
print(get_product_recommendations(query, reco_queries=status["recommendations"], chat_history=[]))
print(get_conversation_chain_conv().invoke(query).content)

{'relevancy_status': True, 'recommendations': ['Floral Wrap Dress', 'Off-Shoulder Maxi Dress', 'Lace Fit and Flare Dress', 'Ruffled Hem Shift Dress', 'Denim Shirt Dress']}
[{'link': 'http://assets.myntassets.com/v1/images/style/properties/ebb8a69f6e56cf47f9fefd3ac23cfe03_images.jpg', 'price': 690.0, 'title': 'Femella Women Floral Red Dress', 'gender': 'Women', 'mfg_brand_name': 'Femella', 'articleType': 'Dresses', 'baseColour': 'Red', 'id': '39217'}, {'link': 'http://assets.myntassets.com/v1/images/style/properties/730a44ed829f9310beb97f583a960262_images.jpg', 'price': 3800.0, 'title': 'Forever New Women Floral Purple Dress', 'gender': 'Women', 'mfg_brand_name': 'Forever New', 'articleType': 'Dresses', 'baseColour': 'Purple', 'id': '8484'}, {'link': 'http://assets.myntassets.com/v1/images/style/properties/4e056a5231b2f0f427e2b9f2d45a2662_images.jpg', 'price': 1849.0, 'title': 'Mineral Women Floral Orange Dress', 'gender': 'Women', 'mfg_brand_name': 'Mineral', 'articleType': 'Dresses', 

In [None]:
query = "Where should I take my boy friend for date" 
status = get_product_reco_status(query) 
print(status) 
print(get_conversation_chain_conv().invoke(query).content) 

{'relevancy_status': False, 'recommendations': []}
There are many great options for a date with your boyfriend, depending on your interests and preferences. Some ideas could include:

1. A romantic dinner at a nice restaurant
2. A picnic in the park or at the beach
3. A movie night at home with homemade popcorn and snacks
4. A hike or nature walk followed by a picnic
5. A visit to a local museum or art gallery
6. A cooking class or wine tasting experience
7. A day trip to a nearby city or town to explore and try new restaurants
8. A couples massage or spa day
9. A fun activity like mini golf, bowling, or go-kart racing
10. A concert or live music event

Ultimately, the best date idea is one that you both will enjoy and that allows you to spend quality time together. Consider your boyfriend's interests and preferences when planning the date to ensure it is a memorable and enjoyable experience for both of you.


There are many great options for a date with your boyfriend, depending on your interests and preferences. Some ideas could include: 
 
1. A romantic dinner at a nice restaurant 
2. A picnic in the park or at the beach 
3. A movie night at home or at the cinema 
4. A hike or nature walk 
5. A visit to a museum or art gallery 
6. A cooking class or wine tasting 
7. A concert or live music event 
8. A day trip to a nearby city or town 
9. A couples massage or spa day 
10. A fun activity like mini golf, bowling, or go-kart racing 
 
Ultimately, the best date idea is one that you both will enjoy and that allows you to spend quality time together. Consider what you both like to do and choose an activity that will create lasting memories. 