In [1]:
import os
import pickle
import langchain
import faiss
from langchain_openai import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [2]:
from secret_key import openai_key
os.environ['OPENAI_API_KEY'] = openai_key

In [3]:
# Initialize the llm
llm = OpenAI(temperature=0.9, max_tokens=500)

# load documents from urls
# Titles of the documents are as follows:
# 1. China hopes for solution with EU on EV tariffs as soon as possible
# 2. Bitcoin price slides to 2-month low after Fed meeting minutes
document_loader = UnstructuredURLLoader(urls=[
    "https://www.cnbc.com/2024/07/04/china-hopes-for-solution-with-eu-on-ev-tariffs-as-soon-as-possible.html",
    "https://www.cnbc.com/2024/07/04/bitcoin-btc-price-slides-to-2-month-low-after-fed-meeting-minutes.html"
])

data = document_loader.load()

len(data)

2

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

# Data is of type documents, we can use split_documents to split the documents into chunks

docs = text_splitter.split_documents(data)
len(docs)

23

In [5]:
embeddings = OpenAIEmbeddings()

vec_index = FAISS.from_documents(docs, embeddings)

# Save the FAISS index
vec_index.save_local('vector_index')

  warn_deprecated(


In [6]:
# Load the FAISS index with dangerous deserialization allowed
FAISS.load_local("vector_index", embeddings, allow_dangerous_deserialization=True)

<langchain_community.vectorstores.faiss.FAISS at 0x28750627bd0>

In [7]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vec_index.as_retriever()
)

In [8]:
query = "How much did Chinese government spend to develop its electric car industry?"

langchain.debug = True

chain({"question" : query}, return_only_outputs=True)

  warn_deprecated(


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "How much did Chinese government spend to develop its electric car industry?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "The Chinese government spent $230.8 billion over more than a decade to develop its electric car industry, according to an analysis by the U.S.-based Center for Strategic and International Studies.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSubscribe to CNBC PRO\n\nSubscribe to Investing Club\n\nLicensing & Reprints\n\nCNBC Councils\n\nSupply Chain Values\n\nCNBC on Peacock\n\nJoin the CNBC Panel\n\nDigital Products\n\nNews Releases\n\nClosed Captioning\n\nCorr

{'answer': ' The Chinese government spent $230.8 billion over more than a decade to develop its electric car industry.\n',
 'sources': 'https://www.cnbc.com/2024/07/04/china-hopes-for-solution-with-eu-on-ev-tariffs-as-soon-as-possible.html'}

In [9]:
query = "How did Bitcoin change in the last 24 hours?"

langchain.debug = True

chain({"question" : query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "How did Bitcoin change in the last 24 hours?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "watch now\n\nVIDEO\n\n12:50\n\n12:50\n\nHow Wall Street learned to love bitcoin\n\nCrypto\n\nOn Thursday, a small amount of bitcoin was moved from three wallets previously associated with Mt. Gox, according to Arkham Intelligence. The largest movement was for $24 worth of the cryptocurrency. It was not immediately clear if this transaction was made in connection with the Mt. Gox repayment plan.\n\nElsewhere, the German government on Thursday sold roughly 3,000 bitcoins — worth a

{'answer': ' The information provided does not answer the question, as it does not mention any changes in Bitcoin within the last 24 hours.\n',
 'sources': 'https://www.cnbc.com/2024/07/04/bitcoin-btc-price-slides-to-2-month-low-after-fed-meeting-minutes.html'}