In [1]:
# ! pip install -qU \
#     openai \
#     pinecone-client \
#     langchain \
#     tiktoken \
#     dotenv

In [2]:
from dotenv import dotenv_values

config = dotenv_values("../.env")

## Initializing Embedding Model and Vector DB

In [3]:
from FlagEmbedding import FlagModel

model = FlagModel('BAAI/bge-large-en-v1.5', 
                query_instruction_for_retrieval="Represent this sentence for searching relevant passages: ",
                use_fp16=True)

embed = lambda x: model.encode(x).tolist()            



In [4]:
PINECONE_API = config['PINECONE_API']
PINECONE_ENV = config['PINECONE_ENV']
PINECONE_INDEX_NAME = config['PINECONE_INDEX_NAME']

In [5]:
import pinecone
from pinecone import Pinecone, ServerlessSpec

# Set up API key and environment
YOUR_API_KEY = PINECONE_API
index_name = PINECONE_INDEX_NAME

# Initialize Pinecone client using the updated API
pc = Pinecone(api_key=YOUR_API_KEY)

# Check if the index exists, if not, create it
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1024,  # Adjust the dimension as needed for your embeddings
        metric="dotproduct",
        spec=ServerlessSpec(
            cloud="aws",  # Adjust based on your region
            region="us-east-1"
        )
    )

# Retrieve the index
index = pc.Index(index_name)

print(f"Connected to Pinecone index: {index_name}")


Connected to Pinecone index: llm-recommender-system


In [6]:
# index = pinecone.Index(index_name)
index_stats_response = index.describe_index_stats()

print(index_stats_response)

{'dimension': 1024,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100}


## Indexing Embedding Vectors

In [7]:
import pandas as pd

In [8]:
sample_books = pd.read_csv("../data/sample_books_cleaned.csv")

In [9]:
sample_books

Unnamed: 0.1,Unnamed: 0,Id,Name,Authors,ISBN,Rating,PublishYear,PublishMonth,PublishDay,Publisher,...,RatingDist3,RatingDist2,RatingDist1,RatingDistTotal,CountsOfReview,Language,PagesNumber,Description,pagesNumber,Count of text reviews
0,54015,2167181,"The Spellman Files (The Spellmans, #1)",Lisa Lutz,1847820069,3.88,2007,12,1,Charnwood (U.K.),...,3:7346,2:1783,1:653,total:31453,9,eng,434.0,"Meet Isabel ""Izzy"" Spellman, private investiga...",,
1,96931,3069617,Insight Day and Night Guide Boston,Insight Guides,9812468013,4.00,2006,7,15,Insight Guides,...,3:0,2:0,1:0,total:1,1,eng,1.0,Insight Day &amp; Night Guides let you plan yo...,,
2,105559,3430781,"Sixteen Candles (Terror Academy, #3)",Nicholas Pine,0749716886,3.60,1994,6,13,Mammoth,...,3:18,2:9,1:1,total:67,0,eng,174.0,<b>TERROR ACADEMY - A KILLER IS ON THE LOOSE.....,,
3,75202,2971345,His Wedding Ring of Revenge,Julia James,0263185966,3.52,2005,12,2,Thorndike Press,...,3:71,2:30,1:9,total:221,1,eng,288.0,Rachel Vail is still haunted by Vito Farneste'...,,
4,89816,1108028,All for the Union: The Civil War Diary & Lette...,Robert Hunt Rhodes,0679738282,4.12,1992,28,7,Vintage,...,3:129,2:32,1:9,total:720,30,eng,,All for the Union is the eloquent and moving d...,270.0,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,121574,1484139,"Spinning Spells, Weaving Wonders: Modern Magic...",Patricia J. Telesco,0895948036,3.70,1996,1,3,Crossing Press,...,3:10,2:2,1:3,total:33,0,eng,,This essential book of over 300 spells tells h...,256.0,0.0
96,82239,1886108,Through the Medicine Cabinet (The Zack Files #2),Dan Greenburg,0448412624,3.96,1996,8,6,Grosset & Dunlap,...,3:80,2:17,1:4,total:296,26,eng,64.0,One minute I was looking for my retainer in th...,,
97,88653,989125,In Pursuit of His Glory,R.T. Kendall,1591854547,3.73,2004,5,5,Charisma House,...,3:7,2:0,1:1,total:22,2,eng,,Pursuing the glory of God doesn't happen overn...,310.0,2.0
98,20356,686352,His Own Words: Translation and Analysis of the...,Laura Mansfield,1847288804,3.70,2006,17,7,Lulu.com,...,3:3,2:1,1:0,total:10,1,eng,,Al Qaeda second in command Dr. Ayman Zawahiri ...,364.0,


In [10]:
sample_books['category'] =  sample_books.apply(lambda x: 'popular' if x.name < 50 else 'recommended', axis=1)

In [11]:
sample_books['user_id'] = sample_books.apply(lambda x: x.name % 10 + 1 if x['category'] == 'recommended' else None, axis = 1)

In [12]:
sample_books['ranking'] = sample_books.groupby('user_id').cumcount() + 1

In [13]:
sample_books.rename(columns={'Id' : 'id', 'Name': 'title', 'Description': 'description'}, inplace=True)

In [14]:
data = sample_books

In [15]:
data[data['user_id'] == 1]

Unnamed: 0.1,Unnamed: 0,id,title,Authors,ISBN,Rating,PublishYear,PublishMonth,PublishDay,Publisher,...,RatingDistTotal,CountsOfReview,Language,PagesNumber,description,pagesNumber,Count of text reviews,category,user_id,ranking
50,50887,2069496,Tasha the Tap Dance Fairy (Rainbow Magic: Danc...,Daisy Meadows,1846164931,3.85,2007,8,1,Orchard (NY),...,total:590,19,eng,69.0,Rachel and Kirsty are on the hunt for Tasha's ...,,,recommended,1.0,1.0
60,44468,849880,Hell's Angels,Hunter S. Thompson,0345331486,3.97,1985,12,8,Ballantine Books (NY),...,total:41727,39,en-US,,"""California, Labor Day weekend...early, with o...",348.0,39.0,recommended,1.0,2.0
70,82347,1890078,The Reappearance of the Christ,Alice A. Bailey,085330114X,4.15,1948,12,1,Lucis Publishing Company,...,total:33,3,eng,192.0,Many expect the coming of an Avatar or Saviour...,,,recommended,1.0,3.0
80,12060,776434,"Wild Cards (Wild Cards, #1)",George R.R. Martin,0743423801,3.7,2001,1,8,iBooks,...,total:11243,9,en-US,,A shared-universe superhero prose anthology ed...,426.0,9.0,recommended,1.0,4.0
90,4577,1630607,Chasing Danny Boy: Powerful Stories of Gay Cel...,Mark Hemry,1890834319,3.83,2012,1,5,Palm Drive Publishing,...,total:6,5,eng,,CHASING DANNY BOY: POWERFUL STORIES OF GAY CEL...,198.0,5.0,recommended,1.0,5.0


In [16]:
from tqdm.auto import tqdm
from uuid import uuid4
import time

batch_size = 10

texts = []
metadatas = []

print("Starting batch processing...")

for i in tqdm(range(0, len(data), batch_size)):
    print(f"\nProcessing batch {i} to {min(i + batch_size, len(data))}...")

    # Get end of batch
    i_end = min(len(data), i + batch_size)
    batch = data.iloc[i:i_end]

    print(f"Batch size: {len(batch)} records")

    # Get metadata fields
    metadatas = [
        {
            'category': record['category'],
            'title': record['title'],
            'description': record['description'],
            **({'user_id': str(int(record['user_id']))} if record['category'] == 'recommended' else {})
        }
        for _, record in batch.iterrows()
    ]
    print(f"Metadata extracted for {len(metadatas)} records")
    print(metadatas)

    # Get the list of contexts/documents
    documents = list(batch["description"].astype(str))  # Convert descriptions to strings
    print(f"Encoding {len(documents)} documents...")

    start_time = time.time()
    embeds = embed(documents)  # Check if this runs properly
    end_time = time.time()
    print(f"Encoding completed in {end_time - start_time:.2f} seconds!")

    # Get IDs
    ids = batch['id'].astype(str)
    print(f"Retrieved {len(ids)} IDs")

    # Upserting to Pinecone
    print("Upserting data into Pinecone...")
    start_time = time.time()
    index.upsert(vectors=list(zip(ids, embeds, metadatas)))
    end_time = time.time()
    print(f"Upsert completed in {end_time - start_time:.2f} seconds!")

print("All batches processed successfully!")


Starting batch processing...


  0%|          | 0/10 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.



Processing batch 0 to 10...
Batch size: 10 records
Metadata extracted for 10 records
[{'category': 'popular', 'title': 'The Spellman Files (The Spellmans, #1)', 'description': 'Meet Isabel "Izzy" Spellman, private investigator. This twenty-eight-year-old may have a checkered past littered with romantic mistakes, excessive drinking, and creative vandalism; she may be addicted to Get Smart reruns and prefer entering homes through windows rather than doors -- but the upshot is she\'s good at her job as a licensed private investigator with her family\'s firm, Spellman Investigations. Invading people\'s privacy comes naturally to Izzy. In fact, it comes naturally to all the Spellmans. If only they could leave their work at the office. To be a Spellman is to snoop on a Spellman; tail a Spellman; dig up dirt on, blackmail, and wiretap a Spellman. <br /><br />Part Nancy Drew, part Dirty Harry, Izzy walks an indistinguishable line between Spellman family member and Spellman employee. Duties in

In [17]:
index.describe_index_stats()

{'dimension': 1024,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100}

## Creating Vector Store and Querying

In [18]:
# from langchain.schema.vectorstore import VectorStoreRetriever
# from langchain.callbacks.manager import (
#         AsyncCallbackManagerForRetrieverRun,
#         CallbackManagerForRetrieverRun,
#     )
# from typing import (
#     TYPE_CHECKING,
#     Any,
#     Callable,
#     ClassVar,
#     Collection,
#     Dict,
#     Iterable,
#     List,
#     Optional,
#     Tuple,
#     Type,
#     TypeVar,
# )
# from langchain.schema.document import Document

# class PineconeVectorStoreRetriever(VectorStoreRetriever):

#      def _get_relevant_documents(
#         self, query: str, *, run_manager: CallbackManagerForRetrieverRun
#     ) -> List[Document]:
#         if self.search_type == "similarity":
#             docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
#         elif self.search_type == "similarity_score_threshold":
#             docs_and_similarities = (
#                 self.vectorstore.similarity_search_with_relevance_scores(
#                     query, **self.search_kwargs
#                 )
#             )
#             for doc, score in docs_and_similarities:
#                 doc.metadata = {**doc.metadata, **{"score": 1-score}}
#             docs = [doc for doc, _ in docs_and_similarities]
#         elif self.search_type == "mmr":
#             docs = self.vectorstore.max_marginal_relevance_search(
#                 query, **self.search_kwargs
#             )
#         else:
#             raise ValueError(f"search_type of {self.search_type} not allowed.")
#         return docs 

In [33]:

# class PineconeModified(Pinecone):
#     def __init__(self, index, embed, text_field):
#         super().__init__(index, embed, text_field)
    
#     def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
#         """Return VectorStoreRetriever initialized from this VectorStore.

#         Args:
#             search_type (Optional[str]): Defines the type of search that
#                 the Retriever should perform.
#                 Can be "similarity" (default), "mmr", or
#                 "similarity_score_threshold".
#             search_kwargs (Optional[Dict]): Keyword arguments to pass to the
#                 search function. Can include things like:
#                     k: Amount of documents to return (Default: 4)
#                     score_threshold: Minimum relevance threshold
#                         for similarity_score_threshold
#                     fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
#                     lambda_mult: Diversity of results returned by MMR;
#                         1 for minimum diversity and 0 for maximum. (Default: 0.5)
#                     filter: Filter by document metadata

#         Returns:
#             VectorStoreRetriever: Retriever class for VectorStore.

#         Examples:

#         .. code-block:: python

#             # Retrieve more documents with higher diversity
#             # Useful if your dataset has many similar documents
#             docsearch.as_retriever(
#                 search_type="mmr",
#                 search_kwargs={'k': 6, 'lambda_mult': 0.25}
#             )

#             # Fetch more documents for the MMR algorithm to consider
#             # But only return the top 5
#             docsearch.as_retriever(
#                 search_type="mmr",
#                 search_kwargs={'k': 5, 'fetch_k': 50}
#             )

#             # Only retrieve documents that have a relevance score
#             # Above a certain threshold
#             docsearch.as_retriever(
#                 search_type="similarity_score_threshold",
#                 search_kwargs={'score_threshold': 0.8}
#             )

#             # Only get the single most similar document from the dataset
#             docsearch.as_retriever(search_kwargs={'k': 1})

#             # Use a filter to only retrieve documents from a specific paper
#             docsearch.as_retriever(
#                 search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
#             )
#         """
#         tags = kwargs.pop("tags", None) or []
#         tags.extend(self._get_retriever_tags())

#         return PineconeVectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)

from langchain.vectorstores import VectorStore
from langchain.schema.vectorstore import VectorStoreRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from typing import List, Any, Optional, Dict, Tuple
from langchain.schema.document import Document
import pinecone


class PineconeVectorStoreRetriever(VectorStoreRetriever):
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        # Assuming we're using similarity search
        docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
        return docs


class PineconeModified(VectorStore):
    def __init__(self, index: pinecone.Index, embed, text_field: str):
        self.index = index
        self.embed = embed
        self.text_field = text_field

    @classmethod
    def from_texts(
        cls,
        texts: List[str],
        embed,
        index: pinecone.Index,
        metadata: Optional[List[Dict]] = None,
        text_field: str = "text",
        **kwargs: Any,
    ) -> "PineconeModified":
        """Create a Pinecone vector store from texts and metadata."""
        # Generate embeddings for the texts
        embeddings = embed(texts)
        
        # Prepare the documents with metadata
        documents = [
            {"id": f"doc_{i}", "values": emb, "metadata": metadata[i] if metadata else {}}
            for i, (emb, metadata) in enumerate(zip(embeddings, metadata or [{}] * len(texts)))
        ]
        
        # Upload embeddings to Pinecone
        index.upsert(vectors=documents)

        return cls(index, embed, text_field)

    def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
        """Return VectorStoreRetriever initialized from this VectorStore."""
        return PineconeVectorStoreRetriever(vectorstore=self, **kwargs)

    def similarity_search(self, query: str, k: int = 4, **kwargs) -> List[Document]:
        # Encode the query
        embed_query = self.embed([query])[0]  # Embed the query
        # Perform similarity search using the Pinecone index with keyword arguments
        results = self.index.query(vector=embed_query, top_k=k, include_metadata=True, **kwargs)
        # Convert results to LangChain Document objects
        documents = [
            Document(page_content=result['metadata'][self.text_field], metadata=result['metadata'])
            for result in results['matches']
        ]
        return documents
    
    def similarity_search_with_score(self, query: str, k: int = 4, **kwargs) -> List[Tuple[Document, float]]:
        """Perform similarity search with relevance scores."""
        embed_query = self.embed([query])[0]  # Embed the query
        # Perform similarity search using the Pinecone index with keyword arguments
        results = self.index.query(vector=embed_query, top_k=k, include_metadata=True, **kwargs)
        # Convert results to LangChain Document objects and include scores
        documents_with_scores = [
            (
                Document(page_content=result['metadata'][self.text_field], metadata=result['metadata']),
                result['score']  # Assuming `score` is available from Pinecone results
            )
            for result in results['matches']
        ]
        return documents_with_scores


In [34]:
# text_field = "description"

# # switch back to normal index for langchain
# index = pc.Index(index_name)


# vectorstore = PineconeModified(index, embed, text_field) 


# Now we can create the PineconeModified vector store
vectorstore = PineconeModified(index, embed, "description")
     

In [35]:
query = "adventure story"

vectorstore.similarity_search_with_score(
    query, 
    k=3,
    filter={'category': 'popular'}
)



[(Document(metadata={'category': 'popular', 'description': 'They did the impossible, deposing the godlike being whose brutal rule had lasted a thousand years. Now Vin, the street urchin who has grown into the most powerful Mistborn in the land, and Elend Venture, the idealistic young nobleman who loves her, must build a healthy new society in the ashes of an empire.<br /><br />They have barely begun when three separate armies attack. As the siege tightens, an ancient legend seems to offer a glimmer of hope. But even if it really exists, no one knows where to find the Well of Ascension or what manner of power it bestows.<br /><br />It may just be that killing the Lord Ruler was the easy part. Surviving the aftermath of his fall is going to be the real challenge.', 'title': 'The Well of Ascension (Mistborn, #2)'}, page_content='They did the impossible, deposing the godlike being whose brutal rule had lasted a thousand years. Now Vin, the street urchin who has grown into the most powerful

In [36]:
query = "history"

vectorstore.similarity_search_with_score(
    query, 
    k=3,
    filter={'user_id' : '1' , 'category': 'recommended'}
)
     

[(Document(metadata={'category': 'recommended', 'description': '"California, Labor Day weekend...early, with ocean fog still in the streets, outlaw motorcyclists wearing chains, shades &amp; greasy Levis roll out from damp garages, all-night diners &amp; cast-off one-night pads in Frisco, Hollywood, Berdoo &amp; East Oakland, heading for the Monterey peninsula, north of Big Sur...The Menace is loose again." Thus begins Hunter S. Thompson\'s vivid account of his experiences with California\'s most notorious motorcycle gang, the Hell\'s Angels.<br /><br /> In the mid-60s, Thompson spent almost two years living with the controversial Angels, cycling up &amp; down the coast, reveling in the anarchic spirit of their clan, and, as befits their name, raising hell. His book successfully captures a singular moment in American history, when the biker lifestyle was 1st defined, &amp; when such countercultural movements were electrifying &amp; horrifying America. Thompson, the creator of Gonzo jou

## Tools definition


In [37]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.schema.vectorstore import VectorStore
from langchain.chains.query_constructor.ir import StructuredQuery, Visitor
from langchain.schema.language_model import BaseLanguageModel
from langchain.retrievers.self_query.pinecone import PineconeTranslator
from langchain.chains.query_constructor.base import load_query_constructor_runnable

from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union

In [38]:
def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
    """Get the translator class corresponding to the vector store class."""
    BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = {
        PineconeModified: PineconeTranslator,
    }
    
    if vectorstore.__class__ in BUILTIN_TRANSLATORS:
        return BUILTIN_TRANSLATORS[vectorstore.__class__]()
    else:
        raise ValueError(
            f"Self query retriever with Vector Store type {vectorstore.__class__}"
            f" not supported."
        )

class PineconeSelfQueryRetriever(SelfQueryRetriever):

    @classmethod
    def from_llm(
        cls,
        llm: BaseLanguageModel,
        vectorstore: VectorStore,
        document_contents: str,
        metadata_field_info: Sequence[Union[AttributeInfo, dict]],
        structured_query_translator: Optional[Visitor] = None,
        chain_kwargs: Optional[Dict] = None,
        enable_limit: bool = False,
        use_original_query: bool = False,
        **kwargs: Any,
    ) -> "SelfQueryRetriever":
        if structured_query_translator is None:
            structured_query_translator = _get_builtin_translator(vectorstore)
        chain_kwargs = chain_kwargs or {}

        if "allowed_comparators" not in chain_kwargs:
            chain_kwargs[
                "allowed_comparators"
            ] = structured_query_translator.allowed_comparators
        if "allowed_operators" not in chain_kwargs:
            chain_kwargs[
                "allowed_operators"
            ] = structured_query_translator.allowed_operators
        query_constructor = load_query_constructor_runnable(
            llm,
            document_contents,
            metadata_field_info,
            enable_limit=enable_limit,
            **chain_kwargs,
        )
        return cls(
            query_constructor=query_constructor,
            vectorstore=vectorstore,
            use_original_query=use_original_query,
            structured_query_translator=structured_query_translator,
            **kwargs,
        )

In [39]:
# chat completion llm

llm = ChatOpenAI(
    model_name='gpt-4-1106-preview',
    temperature=0.0
)

  llm = ChatOpenAI(


In [40]:
# conversational memory

conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

  conversational_memory = ConversationBufferWindowMemory(


In [41]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

### Tool 1 - Generic Recommendation

In [None]:
# import sys
# sys.path.append('d:/Desktop/SeminarCNPm/Project/Model/llm-recommender-system')

from langchain.chains.query_constructor.schema import AttributeInfo
from modules.helper.PineconeSelfQueryRetriever import PineconeSelfQueryRetriever

metadata_field_info=[
    AttributeInfo(
        name="user_id",
        description="The user ID of the book recommendation",
        type="string",
    ),
    AttributeInfo(
        name="category",
        description="The type of entry (popular or recommended)",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="title",
        description="The title of the book",
        type="string",
    ),
    AttributeInfo(
        name="description",
        description="The description of the book",
        type="float"
    ),
]
document_content_description = "The description of the book"

pineconeSelfQueryRetriever = PineconeSelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

NameError: name 'llm' is not defined

In [None]:
## USER ID IS SPECIFICALLY DETERMINED IN FRONTEND

In [None]:
# retrieval qa chain

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=pineconeSelfQueryRetriever
)

In [None]:
%%time

qa.run("I am user 1 and recommend some books for me")

#### Optimized version

In [None]:
%%time

generic_qa = pineconeSelfQueryRetriever | format_docs

generic_qa.invoke("I am user 1 and recommend some books for me")

### Tool 2 - Popular Recommendation

In [None]:
# retrieval qa chain

popular_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(
        search_kwargs={'k' : 5, 
                       'filter': {'category': 'popular'}}),
    # return_source_documents = True
)

In [None]:
popular_qa.run("Recommend me books about history")

#### Optimized version

In [None]:
pinecone_retriever = vectorstore.as_retriever(
                search_kwargs={'k' : 5, 
                                'filter': {'category': 'popular'}})

popular_qa = pinecone_retriever | format_docs

popular_qa.invoke("Recommend me books about history")

### Tool 3 - Specific Recommendation

In [None]:
recommended_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(
        search_kwargs={'k' : 5, 
                       'filter': {'user_id' : '1', 'category': 'recommended'}}),
    # return_source_documents = True
)

In [None]:
recommended_qa.invoke("Recommend me books about history")

#### Optimized Version

In [None]:
def format_docs_title(docs):
    return "\n\n".join([f"{i+1}. {d.metadata['title']} : {d.page_content}" for i,d in enumerate(docs)])

In [None]:
pineconeRetreiver = vectorstore.as_retriever(
        search_kwargs={'k' : 5, 
                       'filter': {'user_id' : '1', 'category': 'recommended'}})

recommended_qa = pineconeRetreiver | format_docs_title

recommended_qa.invoke("history books")

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from operator import itemgetter

# Check if the recommended books is match the user's query
chain = (
    {"recommended_books": recommended_qa, "query": RunnablePassthrough()}
    | ChatPromptTemplate.from_template(
        """
        Check if the document recommends a book. Say "yes" or "no".

        Recommended Books: 
        {recommended_books}

        Classification:"""
    )
    | llm
    | StrOutputParser()
)

# If yes
recommended_chain = (
    {"recommended_books" : RunnablePassthrough()}
    | PromptTemplate.from_template(
        """
        You are an expert in recommended books. \
        Give the user book recommendation books using below information. \
        Always start with "I have some books recommendation that is tailored to your taste. \
        
        Recommended Books: 
        {recommended_books}
        """
    )
    | llm
)

# If no
popular_chain = (
    {"recommended_books": popular_qa, "question": RunnablePassthrough()}
    | PromptTemplate.from_template(
        """
        You are an expert in recommended books. \
        Give the user book recommendation books using below information. \
        Always start with "I have some popular books that I can recommend for you. \
        
        Recommended Books: 
        {recommended_books}
        """
    )
    | llm
)

from langchain.schema.runnable import RunnableBranch

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

full_chain = (
    {
        "topic": (
            {"recommended_books": recommended_qa , "query": RunnablePassthrough()}
            | ChatPromptTemplate.from_template(
                """
                Check if the document recommends a book. Say "yes" or "no".

                Recommended Books: 
                {recommended_books}

                Classification:"""
            )
            | llm
            | StrOutputParser()
            ), 
        "query": RunnablePassthrough()
    }
    | RunnableBranch(
        (lambda x: "yes" in x["topic"].lower() or "Yes" in x["topic"].lower(), (lambda x :  x['query']) | recommended_qa),
        (lambda x: "no" in x["topic"].lower() or "No" in x["topic"].lower(), (lambda x :  x['query']) | popular_chain),
        (lambda x :  x['query']) | popular_chain
        )
    | StrOutputParser()
)


In [None]:
full_chain.invoke("I am a user 1 and recommend some books about romance for me")

In [None]:
async for chunk in full_chain.astream_log(
    "I am a user 1 and recommend some books about romance for me"
):
    print("-" * 80)
    print(chunk)

In [None]:
full_chain.invoke("I am user 1 and recommend me books about history")

In [None]:
full_chain.invoke("history")

In [None]:
full_chain.invoke("history books")

### Tool Consolidation

In [None]:
from langchain.agents import Tool

tools = [
    Tool(
        name='Generic Recommendation',
        func=qa.run,
        description=(
            'use this tool when the user asking for book recommendation without any specific preference'
        )
    ),
    Tool(
        name='Specific Recommendation',
        func=full_chain.invoke,
        description=(
            'use this tool when the user asking for book recommendation with a specific preference (genre, theme, etc.)'
        )
    ),
    Tool(
        name='Popular Recommendation',
        func=popular_qa.invoke,
        description=(
            'use this tool when the user asking for popular book recommendation without any specific preference'
        )
    ),
    Tool(
        name='Generic Prompt',
        func=llm.invoke,
        description=(
            'use this tool when the user asking or talk about general question'
        )
    ),
]

## Conversation Agent

In [None]:
# conversational memory

conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

In [None]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    # agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [None]:
agent("Recommend me some books about history")

In [None]:
agent("Great, I love it")