In [None]:
! pip install -qU \
    openai \
    pinecone-client \
    langchain \
    tiktoken

In [1]:
! export OPENAI_API_KEY="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"

## Initializing Embedding Model and Vector DB

In [1]:
from getpass import getpass
from langchain.embeddings.openai import OpenAIEmbeddings


model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model=model_name
)

In [None]:
PINECONE_API = "8263cb74-c5f2-4466-9bb3-8fb7ce2111a6"
PINECONE_ENV = "gcp_starter"

In [4]:
import pinecone

# find API key in console at app.pinecone.io
YOUR_API_KEY = getpass("Pinecone API Key: ")
# find ENV (cloud region) next to API key in console
YOUR_ENV = input("Pinecone environment: ")

index_name = 'llm-recommender-system'
pinecone.init(
    api_key=YOUR_API_KEY,
    environment=YOUR_ENV
)

if index_name not in pinecone.list_indexes():
    # we create a new index
    pinecone.create_index(
        name=index_name,
        metric='dotproduct',
        dimension=1536  # 1536 dim of text-embedding-ada-002
    )

In [5]:
index = pinecone.Index(index_name)
index_stats_response = index.describe_index_stats()

print(index_stats_response)

{'dimension': 1536,
 'index_fullness': 0.001,
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100}


## Indexing Embedding Vectors

In [16]:
import pandas as pd

In [17]:
sample_books = pd.read_csv("../data/sample_books.csv")

In [21]:
sample_books

Unnamed: 0,Id,Name,Description
0,1374307,"Tough Times Never Last, but Tough People Do!","Name your problem, and you name your possibili..."
1,1334737,Black Gold,This bracing novel introduces vivid African-Am...
2,1598771,The Autobiography of Lincoln Steffens,Rediscover a man Americans turned to not only ...
3,2179832,Bright Shiny Morning,One of the most celebrated and controversial a...
4,4421731,The Socially Skilled Child Molester: Different...,Know what signs indicate a child molester!<br ...
...,...,...,...
95,2442839,"National Portrait Gallery, London",-- The first full catalogue of this major art ...
96,2787050,Collecting Toy Cars and Trucks: A Collector's ...,This indispensable identification and value gu...
97,4506874,Self Related Cognitions In Anxiety And Motivation,First published in 1986. Routledge is an impri...
98,3002398,Predators: A Pop-up Book with Revolutionary Te...,Get up close and personal with some of the wor...


In [23]:
sample_books['category'] =  sample_books.apply(lambda x: 'popular' if x.name < 50 else 'recommended', axis=1)

In [34]:
sample_books['user_id'] = sample_books.apply(lambda x: x.name % 10 + 1 if x['category'] == 'recommended' else None, axis = 1)

In [92]:
sample_books['ranking'] = sample_books.groupby('user_id').cumcount() + 1

In [93]:
sample_books.rename(columns={'Id' : 'id', 'Name': 'title', 'Description': 'description'}, inplace=True)

In [94]:
data = sample_books

In [96]:
data[data['user_id'] == 1]

Unnamed: 0,id,title,description,category,user_id,ranking
50,4589735,Informal Economy: A Research G,First published in 1991. Routledge is an impri...,recommended,1.0,1.0
60,1112628,In Amma's Healing Room: Gender and Vernacular ...,"""[I]t is extremely salubrious to see the ways ...",recommended,1.0,2.0
70,3041182,Vision & Prophecy in Amos Revised,In 1955 John D. W. Watts presented the faculty...,recommended,1.0,3.0
80,2243440,Environmental Regulation: Statutory Supplement...,This excellent resource takes full advantage o...,recommended,1.0,4.0
90,4502407,Hooded Americanism: The History of the Ku Klux...,"""The only work that treats Ku Kluxism for the ...",recommended,1.0,5.0


In [105]:
from tqdm.auto import tqdm
from uuid import uuid4

batch_size = 100

texts = []
metadatas = []

for i in tqdm(range(0, len(data), batch_size)):
    # get end of batch
    i_end = min(len(data), i+batch_size)
    batch = data.iloc[i:i_end]
    # first get metadata fields for this record
    metadatas = [
    {
        'category': record['category'],
        'title': record['title'],
        'description': record['description'],
        **({'user_id': str(int(record['user_id']))} if record['category'] == 'recommended' else {})
    }
    for _, record in batch.iterrows()]
    # get the list of contexts / documents
    documents = batch['description']
    # create document embeddings
    embeds = embed.embed_documents(documents)
    # get IDs
    ids = batch['id'].astype(str)
    # add everything to pinecone
    index.upsert(vectors=zip(ids, embeds, metadatas))

100%|██████████| 1/1 [00:04<00:00,  4.98s/it]


In [106]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.001,
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100}

## Creating Vector Store and Querying

In [13]:
from langchain.vectorstores import Pinecone

text_field = "description"

# switch back to normal index for langchain
index = pinecone.Index(index_name)

vectorstore = Pinecone(
    index, embed.embed_query, text_field
)
     



In [14]:
query = "adventure story"

vectorstore.similarity_search_with_score(
    query, 
    k=3,
    filter={'category': 'popular'}
)
     

[(Document(page_content="Hope and Have introduces us to a young orphan who undergoes a miraculous transformation. Ther first part of the story tells how Fanny, through trials, triumphs, and her faith in God, has a change of heart. The second part of the story follows Fanny's adventures during the terrible Indian massacre of 1862. The gripping story of how Fanny and a young man named Ethan French escape makes this a captivating book.", metadata={'category': 'popular', 'title': 'Hope and Have'}),
  0.81603086),
 (Document(page_content="Combining a phonics-based text with a funny story, the books in this new series are aimed at beginner readers. The pictures give clues to the words and fold out flaps on some pages give children the chance to guess what will happen next'", metadata={'category': 'popular', 'title': 'Fat Cat on a Mat'}),
  0.811865509),
 (Document(page_content="This bracing novel introduces vivid African-American characters into the traditional territory of James Michener an

In [15]:
query = "history"

vectorstore.similarity_search_with_score(
    query, 
    k=3,
    filter={'user_id' : '1' , 'category': 'recommended'}
)
     

[(Document(page_content='"The only work that treats Ku Kluxism for the entire period of it\'s existence. . . . <i>the </i>authoritative work on the period. <i>Hooded Americanism</i> is exhaustive in its rich detail and its use of primary materials to paint the picture of a century of terror. It is comprehensive, since it treats the entire period, and enjoys the perspective that the long view provides. It is timely, since it emphasizes the undeniable persistence of terrorism in American life."—John Hope Franklin', metadata={'category': 'recommended', 'title': 'Hooded Americanism: The History of the Ku Klux Klan, 3rd ed.', 'user_id': '1'}),
  0.781868577),
 (Document(page_content="This excellent resource takes full advantage of on-line information and students' familiarity with it. Reflecting the author's dual focus on environmental regulations and the policy considerations that shape them.", metadata={'category': 'recommended', 'title': 'Environmental Regulation: Statutory Supplement an

## Converstational Agent

In [53]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

# chat completion llm
llm = ChatOpenAI(
    model_name='gpt-3.5-turbo',
    temperature=0.0
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(
        search_kwargs={'k' : 5, 
                       'filter': {'user_id' : '1' , 'category': 'recommended'}})
)

In [54]:
result = qa.run("Recommend me some books about history")
result

'1. "Hooded Americanism: The History of the Ku Klux Klan" by David M. Chalmers\n2. "Vision and Prophecy in Amos" by John D. W. Watts\n3. "Environmental Policy: New Directions for the Twenty-First Century" by Norman J. Vig and Michael E. Kraft\n4. "In Amma\'s Healing Room: Gender and Vernacular Islam in South India" by Joyce Burkhalter Flueckiger'

In [39]:
from langchain.agents import Tool

tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'use this tool when the user asking for book recommendation'
        )
    )
]

In [40]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [41]:
agent("Recommend me some books about history")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Knowledge Base",
    "action_input": "books about history"
}[0m
Observation: [36;1m[1;3mHere are some books about history:

1. "Hooded Americanism: The History of the Ku Klux Klan" by David M. Chalmers
2. "Vision and Prophecy in Amos" by John D. W. Watts
3. "Environmental Policy: New Directions for the Twenty-First Century" by Norman J. Vig and Michael E. Kraft
4. "In Amma's Healing Room: Gender and Vernacular Islam in South India" by Joyce Burkhalter Flueckiger

Please note that these books cover different aspects of history, so you may want to specify a particular time period or topic you are interested in for more specific recommendations.[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "Here are some books about history:\n\n1. \"Hooded Americanism: The History of the Ku Klux Klan\" by David M. Chalmers\n2. \"Vision and Prophecy in Amos\" by John D. W. Watts\n3. \"Environmen

{'input': 'Recommend me some books about history',
 'chat_history': [],
 'output': 'Here are some books about history:\n\n1. "Hooded Americanism: The History of the Ku Klux Klan" by David M. Chalmers\n2. "Vision and Prophecy in Amos" by John D. W. Watts\n3. "Environmental Policy: New Directions for the Twenty-First Century" by Norman J. Vig and Michael E. Kraft\n4. "In Amma\'s Healing Room: Gender and Vernacular Islam in South India" by Joyce Burkhalter Flueckiger\n\nPlease note that these books cover different aspects of history, so you may want to specify a particular time period or topic you are interested in for more specific recommendations.'}

In [21]:
agent("what is 2 * 7?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "The result of 2 * 7 is 14."
}[0m

[1m> Finished chain.[0m


{'input': 'what is 2 * 7?',
 'chat_history': [HumanMessage(content='Recommend me some books about history'),
  AIMessage(content='Here are some books about history:\n\n1. "A History of Britain" by Simon Schama\n2. "Guns, Germs, and Steel: The Fates of Human Societies" by Jared Diamond\n3. "Sapiens: A Brief History of Humankind" by Yuval Noah Harari\n4. "The Guns of August" by Barbara Tuchman\n5. "The Diary of a Young Girl" by Anne Frank\n6. "The Rise and Fall of the Third Reich" by William L. Shirer\n7. "The Civil War: A Narrative" by Shelby Foote\n8. "The Origins of Political Order" by Francis Fukuyama\n9. "The Wright Brothers" by David McCullough\n10. "The Silk Roads: A New History of the World" by Peter Frankopan\n\nThese books cover a wide range of historical topics and are highly regarded in the field of history.')],
 'output': 'The result of 2 * 7 is 14.'}