In [3]:
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
import os, sys
sys.path.insert(1, 'D:\Github\DeepLake-Langchain')
import credentials
os.environ["OPENAI_API_KEY"] = credentials.openai
os.environ['ACTIVELOOP_TOKEN'] = credentials.active_loop

In [4]:
llm = OpenAI(temperature=0)

template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n"""
prompt_template = PromptTemplate(input_variables=["question"], template=template)

question_chain = LLMChain(llm=llm, prompt=prompt_template)

question_chain.run("what is the latest fast and furious movie?")

'\nThe latest Fast and Furious movie is Fast & Furious 9, which was released in May 2021.'

In [5]:
import os

os.environ["GOOGLE_CSE_ID"] = credentials.google_search_key
os.environ["GOOGLE_API_KEY"] = credentials.google_api_key

In [6]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
TOP_N_RESULTS = 10

In [7]:
def top_n_results(query):
    return search.results(query, TOP_N_RESULTS)

tool = Tool(
    name = "Google Search",
    description="Search Google for recent results.",
    func=top_n_results
)

In [8]:
query = "What is the latest fast and furious movie?"

results = tool.run(query)

for result in results:
    print(result["title"])
    print(result["link"])
    print(result["snippet"])
    print("-"*50)

Fast & Furious movies in order | chronological and release order ...
https://www.radiotimes.com/movies/fast-and-furious-order/
Mar 22, 2023 ... Fast & Furious Presents: Hobbs & Shaw (2019); F9 (2021); Fast and Furious 10 (2023). Tokyo Drift also marks the first appearance of Han Lue, a ...
--------------------------------------------------
FAST X | Official Trailer 2 - YouTube
https://www.youtube.com/watch?v=aOb15GVFZxU
Apr 19, 2023 ... Fast X, the tenth film in the Fast & Furious Saga, launches the final ... witnessed it all and has spent the last 12 years masterminding a ...
--------------------------------------------------
Fast & Furious - Wikipedia
https://en.wikipedia.org/wiki/Fast_%26_Furious
It began the original tetralogy of films focused on illegal street racing, which culminated in the film Fast & Furious (2009). The series transitioned ...
--------------------------------------------------
How to Watch Fast and Furious Movies in Chronological Order - IGN
https://www.ign.com

In [9]:
import newspaper

pages_content = []

for result in results:
	try:
		article = newspaper.Article(result["link"])
		article.download()
		article.parse()
		if len(article.text) > 0:
			pages_content.append({ "url": result["link"], "text": article.text })
	except:
		continue

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100)
docs = []
for d in pages_content:
	chunks = text_splitter.split_text(d["text"])
	for chunk in chunks:
		new_doc = Document(page_content=chunk, metadata={ "source": d["url"] })
		docs.append(new_doc)

In [11]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

docs_embeddings = embeddings.embed_documents([doc.page_content for doc in docs])
query_embedding = embeddings.embed_query(query)

In [12]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def get_top_k_indices(list_of_doc_vectors, query_vector, top_k):
  # convert the lists of vectors to numpy arrays
  list_of_doc_vectors = np.array(list_of_doc_vectors)
  query_vector = np.array(query_vector)

  # compute cosine similarities
  similarities = cosine_similarity(query_vector.reshape(1, -1), list_of_doc_vectors).flatten()

  # sort the vectors based on cosine similarity
  sorted_indices = np.argsort(similarities)[::-1]

  # retrieve the top K indices from the sorted list
  top_k_indices = sorted_indices[:top_k]

  return top_k_indices

In [13]:
top_k = 2
best_indexes = get_top_k_indices(docs_embeddings, query_embedding, top_k)
best_k_documents = [doc for i, doc in enumerate(docs) if i in best_indexes]

In [14]:
best_k_documents

[Document(page_content='American media franchise\n\nFast & Furious (also known as The Fast and the Furious) is a media franchise centered on a series of action films that are largely concerned with street racing, heists, spies, and family. The franchise also includes short films, a television series, toys, video games, live shows, and theme park attractions. The films are distributed by Universal Pictures.\n\nThe first film, based on the 1998 Vibe magazine article "Racer X" by Ken Li and written by Gary Scott Thompson, Erik Bergquist, and David Ayer, was released in 2001. It began the original tetralogy of films focused on illegal street racing, which culminated in the film Fast & Furious (2009). The series transitioned towards heists and spying with Fast Five (2011), which was followed by five sequels in that genre, the most recent of which, Fast X, was released on May 19, 2023.\n\nUniversal expanded the series to include the spin-off film Hobbs & Shaw (2019), while its subsidiary Dre

In [15]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")

response = chain({"input_documents": best_k_documents, "question": query}, return_only_outputs=True)

response_text, response_sources = response["output_text"].split("SOURCES:")
response_text = response_text.strip()
response_sources = response_sources.strip()

print(f"Answer: {response_text}")
print(f"Sources: {response_sources}")

Answer: The latest Fast and Furious movie is Fast and Furious Presents: Hobbs & Shaw (2019).
Sources: https://en.wikipedia.org/wiki/Fast_%26_Furious, https://www.menshealth.com/entertainment/a36716650/fast-and-furious-movies-in-order/
