In [None]:
from dotenv import load_dotenv

load_dotenv('../../.env')

# Ask Trending Question

In [1]:
from langchain import LLMChain, PromptTemplate
from langchain import HuggingFaceHub

llm = HuggingFaceHub(
    repo_id='mistralai/Mistral-7B-Instruct-v0.2',
    model_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)


In [3]:
template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n"""
prompt_template = PromptTemplate(input_variables=["question"], template=template)

question_chain = LLMChain(llm=llm, prompt=prompt_template)

print(question_chain.run("what is the latest fast and furious movie?"))

You are an assistant that answers the following question correctly and honestly: what is the latest fast and furious movie?

I'd be happy to help answer your question! The most recent installment in the Fast & Furious franchise as of my current knowledge is "F9: The Fast Saga," which was released in theaters on June 25, 2021. This action-packed film brings back the main cast, including Vin Diesel and Michelle Rodriguez, for another high-octane adventure. I hope this information is helpful! Let me know if you have any other questions.


# Use Google Search to Get Results

In [4]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
TOP_N_RESULTS = 10

def top_n_results(query):
    return search.results(query, TOP_N_RESULTS)

tool = Tool(
    name = "Google Search",
    description="Search Google for recent results.",
    func=top_n_results
)

query = "What is the latest fast and furious movie?"

results = tool.run(query)

for result in results:
    print(result["title"])
    print(result["link"])
    print(result["snippet"])
    print("-"*50)

Fast & Furious - Wikipedia
https://en.wikipedia.org/wiki/Fast_%26_Furious
It began the original tetralogy of films focused on illegal street racing, which culminated in the film Fast & Furious (2009). The series moved towards heists ...
--------------------------------------------------
Fast & Furious movies in order | chronological and release order ...
https://www.radiotimes.com/movies/fast-and-furious-order/
Mar 22, 2023 ... Fast & Furious Presents: Hobbs & Shaw (2019); F9 (2021); Fast and Furious 10 (2023). Tokyo Drift also marks the first appearance of Han Lue, a ...
--------------------------------------------------
Fast and Furious Movies in Order Chronologically and by Release ...
https://movieweb.com/fast-and-furious-movies-in-order/
Jul 24, 2023 ... The Fate of the Furious (2017). FateoftheFurious (1) Universal Pictures. Once again, Dominic finds himself getting ...
--------------------------------------------------
How to Watch Fast and Furious Movies in Chronological Order 

# Use Newspaper

In [5]:
import newspaper

pages_content = []

for result in results:
	try:
		article = newspaper.Article(result["link"])
		article.download()
		article.parse()
		if len(article.text) > 0:
			pages_content.append({ "url": result["link"], "text": article.text })
	except:
		continue

# Split Text

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100)

docs = []
for d in pages_content:
	chunks = text_splitter.split_text(d["text"])
	for chunk in chunks:
		new_doc = Document(page_content=chunk, metadata={ "source": d["url"] })
		docs.append(new_doc)

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs = {'device':'cpu'} )

docs_embeddings = embeddings.embed_documents([doc.page_content for doc in docs])
query_embedding = embeddings.embed_query(query)

# Get top_k results for inferencing

In [8]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_top_k_indices(list_of_doc_vectors, query_vector, top_k):
  # convert the lists of vectors to numpy arrays
  list_of_doc_vectors = np.array(list_of_doc_vectors)
  query_vector = np.array(query_vector)

  # compute cosine similarities
  similarities = cosine_similarity(query_vector.reshape(1, -1), list_of_doc_vectors).flatten()

  # sort the vectors based on cosine similarity
  sorted_indices = np.argsort(similarities)[::-1]

  # retrieve the top K indices from the sorted list
  top_k_indices = sorted_indices[:top_k]

  return top_k_indices

top_k = 2
best_indexes = get_top_k_indices(docs_embeddings, query_embedding, top_k)
best_k_documents = [doc for i, doc in enumerate(docs) if i in best_indexes]

# Chain with LLM

In [11]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

chain = load_qa_with_sources_chain(llm, chain_type="stuff")

response = chain({"input_documents": best_k_documents, "question": query}, return_only_outputs=True)

print(response)



In [20]:

response_text, response_sources = response["output_text"].split("FINAL ANSWER:")[-1].split("SOURCES:")
response_text = response_text.strip()
response_sources = response_sources.strip()

print(f"Answer: {response_text}")
print(f"Sources: {response_sources}")

Answer: The latest "Fast and Furious" movie is "Fast X," released on May 19, 2023.
Sources: https://en.wikipedia.org/wiki/Fast_%26_Furious
https://www.usatoday.com/story/entertainment/movies/2022/07/29/fast-and-furious-movies-order-of-release/10062943002/
