In [1]:
import os
import imdb
import requests
import pandas as pd
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain_community.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [2]:
movie = "12thFail"
ia = imdb.IMDb()
url = ia.get_imdbURL(ia.search_movie(movie)[0]) + 'reviews'
response = requests.get(url)

In [3]:
def find_lines_with_substring(corpus, substring='<div class="text show-more__control">'):
    lines_with_substring = [line for line in corpus.split('\n') if substring in line]
    return lines_with_substring

def preprocess(review):
  review = ' '.join(review.split())
  review = review.replace('<div class="text show-more__control">',"").replace("<br/><br/>","\n").replace("</div>","").replace("&#39;","'")
  return review

reviews = list()
for review in find_lines_with_substring(response.text):
  reviews.append(preprocess(review))

if len(reviews)==0:
  raise Exception("Incorrect link")

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=50)
pages = [Document(page_content=r, metadata={"source":"local"}) for r in reviews]
splits = text_splitter.split_documents(pages)
embedding = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.environ['HUGGINGFACEHUB_API_KEY'])
vecterstores = FAISS.from_documents(splits,embedding)
ret = vecterstores.as_retriever()

In [5]:
template = """
<|system|>
You are personal assistant whose job is to provide answer to the user's query based
on the context for the movie {movie}. The answer should be in your own words and be in one sentence.
If you don't know the answer please simply say I don't know.
Context:
{context}</s>
<|user|>
{question}</s>
<|assistant|>
"""

prompt = PromptTemplate(template=template, input_variables=["context","question","movie"])
llm = HuggingFaceHub(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", model_kwargs={"temperature": 1.0},
    huggingfacehub_api_token=os.environ['HUGGINGFACEHUB_API_KEY']
    )
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [6]:
question = "Summarize the movie"
rel_doc = ret.get_relevant_documents(question)
context = ""
for doc in rel_doc:
  context += doc.page_content + "\n"

result = llm_chain.invoke({'question':question, 'context':context, 'movie':movie})
print(result['text'][result['text'].find("<|assistant|>")+len("<|assistant|>\n"):])

The movie 12thFail is a powerful and emotional cinematic experience that explores the themes of bribery, corruption, politics, hardwork, grit, love, tenacity, and being a good Samaritan. The film serves as a reminder that perseverance and resilience can lead to extraordinary achievements, and the level of craftsmanship displayed in the production is remarkable. The plot unfolds with a perfect blend of suspense and emotion, creating a narrative that is both inspiring and thought-provoking. The movie is a powerful reminder that our dreams can be within reach if we are willing to chase them with unwavering determination.
