In [None]:
pip install -r ./requirements.txt -q

In [None]:
# pip show openai

# Python-dotenv

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

# Chat Models: GPT-3.5 Turbo and GPT-4

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
output = llm.invoke('Explain quantum mechanics in one sentence.')

print(output.content)

In [None]:
help(ChatOpenAI)

In [None]:
from langchain.schema import(
    SystemMessage,
    AIMessage,
    HumanMessage
)

messages = [
    SystemMessage(content= 'You are a physicist and respond only in German.'),
    HumanMessage(content='Explain quantum mechanics in one sentence.')
]

output = llm.invoke(messages)
print(output.content)

# Caching LLM Responses

## In-Memory Cache

In [None]:
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI
llm  = OpenAI(model_name = 'gpt-3.5-turbo-instruct')

In [None]:
%%time
from langchain.cache import InMemoryCache
set_llm_cache(InMemoryCache())
prompt = 'Tell me a joke a toddler can understand'
llm.invoke(prompt)

In [None]:
%%time
llm.invoke(prompt)

## SQLite Caching

In [None]:
from langchain.cache import SQLiteCache
set_llm_cache(SQLiteCache(database_path = '.langchain.db'))
prompt = 'Tell me a joke'

In [None]:
%%time
# First request. It will not be cached.
llm.invoke(prompt)

In [None]:
%%time
# Second request is faster since the response is cached.
llm.invoke(prompt)

# LLM Streaming

In [None]:
from langchain_openai import OpenAI

llm = ChatOpenAI()
prompt = 'Write a rock song about the Moon and a Raven.'
for chunk in llm.stream(prompt):
    print(chunk.content, end = '', flush=True)

# PromptTemplates

In [None]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

template = '''You are an experience virologist.
Write a few sentences about the following virus "{virus}" in {language}.'''

prompt_template = PromptTemplate.from_template(template = template)

prompt = prompt_template.format(virus = 'hiv', language = 'german')

llm = ChatOpenAI(model_name = 'gpt-3.5-turbo', temperature = 0)

output = llm.invoke(prompt)
print(output.content)


# ChatPromptTemplates

In [None]:
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content = 'You respond only in the JSON format'),
        HumanMessagePromptTemplate.from_template('Top {n} countries in {area} by population.')
    ]
)

messages = chat_template.format_messages(n=10, area = 'Europe')

print(messages)

In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
output = llm.invoke(message)
print(output.content)

# Simple Chains

In [None]:
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI()

template = '''You are an experience virologist.
Write a few sentences about the following virus "{virus}" in {language}.'''

prompt_template = PromptTemplate.from_template(template = template)

chain = prompt_template | llm | StrOutputParser()

output = chain.invoke({'virus': 'HSV', 'language': ' Spanish'})
print(output)

In [None]:
template = 'What is the capital of {country}? List the top 3 places to visit in that country. Use bullet points'

prompt_template = PromptTemplate.from_template(template = template)

chain = prompt_template | llm | StrOutputParser()

country = input('Enter country: ')
output = chain.invoke(country)
print(output)

# Sequential Chains

In [None]:
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain

llm1 = ChatOpenAI(model_name = 'gpt-3.5-turbo', temperature=0.5)

prompt_template1 = PromptTemplate.from_template(
    template = 'You are an experienced scientist and Python programmer. Write a function that implements the concept of {concept}.'
)

# chain1 = prompt_template1 | llm1 | StrOutputParser()

chain1 = LLMChain(llm=llm1, prompt=prompt_template1)

llm2 = ChatOpenAI(model_name='gpt-4-turbo-preview', temperature=1.2)

prompt_template2 = PromptTemplate.from_template(
    template = 'Given the Python function {function}, describe it as detailed as possible.'
)

# chain2 = prompt_template2 | llm2 | StrOutputParser()

chain2 = LLMChain(llm=llm2, prompt = prompt_template2)

overall_chain = SimpleSequentialChain(chains = [chain1, chain2], verbose=True)

output = overall_chain.invoke('linear regression')


# LangChain Agents in Action: Python REPL

In [None]:
from langchain_experimental.utilities import PythonREPL

python_repl = PythonREPL()
python_repl.run('print([n for n in range(1,100) if n % 13 == 0])')

In [None]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model = 'gpt-4-turbo-preview', temperature=0)

agent_executor = create_python_agent(
    llm = llm,
    tool = PythonREPLTool(),
    verbose=True
)

agent_executor.invoke('Calculate the square root of the factorial of 12 and display it with 4 decimal points.')

In [None]:
response = agent_executor.invoke('What is the answer to 5.1 ** 7.3?')

# LangChain Tools: DuckDuckGo and Wikipedia

In [None]:
pip install -q duckduckgo-search

In [None]:
pip show duckduckgo-search

In [None]:
from langchain.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun()
output = search.invoke('Where was Freddie Mercury born?')
print(output)

In [None]:
search.name

In [None]:
search.description

In [None]:
from langchain.tools import DuckDuckGoSearchResults

search = DuckDuckGoSearchResults()
output = search.run('Freddie Mercury and Queen')
print(output)

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

wrapper = DuckDuckGoSearchAPIWrapper(region = 'de-de', max_results = 3, safesearch = 'moderate')
search = DuckDuckGoSearchResults(api_wrapper = wrapper, source = 'news')
output = search.run('Berlin')
print(output)

In [None]:
import re
pattern = r'snippet: (.*?), title: (.*?), link: (.*?),'
matches = re.findall(pattern, output, re.DOTALL)

for snippet, title, link in matches:
    print(f'Snippet: {snippet}\nTitle: {title}\nLink: {link}\n')
    print('-' * 50)

In [None]:
pip install -q wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=500)
wiki = WikipediaQueryRun(api_wrapper = api_wrapper)
wiki.invoke({'query': 'llamaindex'})

In [None]:
wiki.invoke('Google Gemini')

# Creating a ReAct Agent

In [None]:
pip install langchainhub -q

In [None]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

In [None]:
from langchain.prompts import PromptTemplate
from langchain import hub
from langchain.agents import Tool, AgentExecutor, initialize_agent, create_react_agent
from langchain.tools import DuckDuckGoSearchRun, WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name = 'gpt-4-turbo-preview', temperature=0)

# template='''
# Answer the following questions as best as you can.
# Questions: {q}
# '''

template='''
Answer the following questions in Brazilian Portuguese as best as you can.
Questions: {q}
'''

prompt_template = PromptTemplate.from_template(template)
prompt = hub.pull('hwchase17/react')
# print(type(prompt))
# print(prompt.input_variables)
# print(prompt.template)

# 1. Python REPL tool (for execute Python code)

python_repl = PythonREPLTool()

python_repl_tool = Tool(
    name = 'Python REPL',
    func=python_repl.run,
    description='Useful when you need to use Python to answer a question. You should input Python code.'
)   

# 2. Wikipedia tool (for searching Wikipedia)
api_wrapper = WikipediaAPIWrapper()
wikipedia = WikipediaQueryRun(api_wrapper = api_wrapper)
wikipedia_tool = Tool(
    name='Wikipedia',
    func=wikipedia.run,
    description='Useful for when you need to look up a topic, country, or person on Wikipedia.'
)

# 3. DuckDuckGo Search Tool (for general web searches)
search = DuckDuckGoSearchRun()
duckduckgo_tool= Tool(
    name='DuckDuckGo Search',
    func=search.run,
    description='Useful for when you need to perform an internet search to find information that another tool can\'t provide.'
)

tools = [python_repl_tool, wikipedia_tool, duckduckgo_tool]

agent = create_react_agent(llm, tools, prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=10
)

In [None]:
question = 'Generate the first 20 numbers in the Fibonacci series.'

output = agent_executor.invoke({
    'input': prompt_template.format(q=question)
})

In [None]:
print(output['input'])

In [None]:
print(output['output'])

In [None]:
question = 'Who is the current prime minister of the UK?'

output = agent_executor.invoke({
    'input': prompt_template.format(q=question)
})

In [None]:
question = 'What are the latest Brazilian football news?'

output = agent_executor.invoke({
    'input': prompt_template.format(q=question)
})

# Pinecone

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

In [None]:
pip install -q pinecone

In [None]:
pip install --upgrade -q pinecone

In [None]:
pip show pinecone

In [None]:
from pinecone import Pinecone

pc = Pinecone()
pc.list_indexes()

# Working with Pinecone Indexes

In [None]:
pc.list_indexes()

In [None]:
pc.list_indexes().names()

In [None]:
from pinecone import ServerlessSpec

index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index: {index_name}', flush=True)
    pc.create_index(
        name = index_name, 
        dimension = 1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )

    print('Index created.')
else:
    print(f'Index {index_name} already exists.')

In [None]:
index_name = 'langchain'

if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name}...')
    pc.delete_index(index_name)
    print('Done.')
else:
    print(f'Index {index_name} does not exist.')

In [None]:
index = pc.Index(index_name)
index.describe_index_stats()

# Working with Vectors

In [None]:
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index_name = 'langchain'

index = pc.Index(index_name)

index.upsert(vectors = zip(ids, vectors))

In [None]:
# updating vectors
index.upsert(vectors=[('c', [0.5]*1536)])

In [None]:
# Fetching vectors
index = pc.Index(index_name)
index.fetch(ids=['c', 'd'])

In [None]:
# Deleting vectors
index.delete(ids=['b', 'c'])

In [None]:
index.describe_index_stats()

In [None]:
index.fetch(ids=['x'])

In [None]:
# query
query_vector = [ random.random() for _ in range(1536)]

index.query(
    vector = query_vector,
    top_k = 3,
    include_values=False
)

# Namespaces

In [None]:
import random

index_name = 'langchain'

index = pc.Index(index_name)

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index.upsert(vectors = zip(ids, vectors))

In [None]:
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
# print(vectors)
ids = list('xyz')

index.upsert(vectors = zip(ids, vectors), namespace='first-namespace')

In [None]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
# print(vectors)
ids = list('qp')

index.upsert(vectors = zip(ids, vectors), namespace='second-namespace')

In [None]:
index.describe_index_stats()

In [None]:
index.fetch(ids=['x'], namespace='first-namespace')

In [None]:
index.delete(ids=['x'], namespace='first-namespace')

In [None]:
index.delete(delete_all=True, namespace='first-namespace')

In [None]:
index.describe_index_stats()

# Splitting and Embedding Text Using Langchain

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open('files/churchill_speech.txt') as f:
    churchill_speech = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

In [None]:
chunks = text_splitter.create_documents([churchill_speech])

In [None]:
# print(chunks[0].page_content)
# print(chunks[1].page_content)
print(f'Now you have {len(chunks)}')

## Embedding Cost

In [None]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-3-small')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens/1000 * 0.0004:.6f}')

print_embedding_cost(chunks)

In [None]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)

In [None]:
vector = embeddings.embed_query(chunks[0].page_content)

# Inserting the Embeddings into a Pinecone Index

In [None]:
pip show pinecone

In [None]:
pip show langchain-community

In [None]:
import pinecone

from pinecone.data.index import Index
from langchain_community.vectorstores import Pinecone

pc = pinecone.Pinecone()

In [None]:
# for i in pc.list_indexes().names():
#     print('Deleting all indexes... ', end='', flush=True)
#     pc.delete_index(i)
#     print('done')

In [None]:
index_name = 'churchill-speech'
if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}...', end='', flush=True)
    pc.create_index(
    name=index_name,
    dimension=1536,
    metric='cosine',
    spec=pinecone.ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
    print('done.')
else:
    print(f'Index {index_name} already exists.')

In [None]:
vector_store = Pinecone.from_documents(chunks, embeddings, index_name = index_name)

In [None]:
vector_store = Pinecone.from_existing_index(index_name = 'churchill-speech', embedding=embeddings)

# Asking Questions (Similarity Search)

In [None]:
query = 'Where should we fight?'
result = vector_store.similarity_search(query)
print(result)

In [None]:
for r in result:
    print(r.page_content)
    print('-' * 50)

In [None]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

In [None]:
# query = 'Where should we fight?'
# query = 'Who was the king of Belgium at that time?'
query = 'What about the French Armies?'
answer = chain.invoke(query)
print(answer)