In [1]:
from config import *
from dotenv import load_dotenv
import os
import openai

load_dotenv()

openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
from pymongo import MongoClient

# initialize MongoDB python client
client = MongoClient(MONGODB_CLUSTER)
MONGODB_COLLECTION = client[DATABASE_NAME][VECTORDB_COLLECTION_NAME]
ATLAS_VECTOR_SEARCH_INDEX_NAME = "EmbeddingText"

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
from langchain.schema import Document


def get_docs(movie_name, if_split=True):
    df = pd.read_csv(f"movie_reviews_link\{movie_name}.csv")
    docs = []
    for i in df.iterrows():
        row = i[1]
        docs.append(
            Document(
                page_content=row["review_title"] + " " + row["review_comment"],
                metadata={
                    "date": row["review_date"],
                    "title": row["review_title"],
                    "rating": row["review_rating"],
                    "helpful": row["review_helpful"],
                    "total_votes": row["review_total_votes"],
                    "if_spoiler": row["reviews_if_spoiler"],
                    "link": row["review_link"],
                    "MovieName": movie_name,
                    "source": "imdb",
                },
            )
        )

    with open("wikipedia_data\Whiplash_2014.txt", "r") as f:
        data = f.read()
        docs.append(Document(page_content=data, metadata={"MovieName": movie_name}))
    if if_split:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=64
        )
        docs = text_splitter.split_documents(docs)
    return docs

In [4]:
review_docs = get_docs("Green_lantern_2011")

In [5]:
len(review_docs)

1499

In [6]:
# from langchain.embeddings import SentenceTransformerEmbeddings
# from langchain.vectorstores import MongoDBAtlasVectorSearch
# from config import *
# vector_search = MongoDBAtlasVectorSearch.from_documents(
#     documents=review_docs,
#     embedding=SentenceTransformerEmbeddings(model_kwargs={"device":"cuda"}),
#     collection=MONGODB_COLLECTION,
#     index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
# )

In [7]:
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.embeddings import SentenceTransformerEmbeddings

vector_search = MongoDBAtlasVectorSearch.from_connection_string(
    MONGODB_CLUSTER,
    DATABASE_NAME + "." + VECTORDB_COLLECTION_NAME,
    SentenceTransformerEmbeddings(model_kwargs={"device": "cuda"}),
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
)

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
query = "Explain the plot of green lantern to me like I am 5"
results = vector_search.similarity_search_with_score(
    query=query, k=2, pre_filter={"MovieName": {"$eq": "Green_lantern_2011"}}
)

# Display results
for result in results:
    print(result)

(Document(page_content='Gives all other comic book movies a bad name. When an ancient evil awakens and kills our local intergalactic protector Hal Jordan is chosen to replace him as the new Green Lantern and must stop this evil before it destroys our world THIS MOVIE GIVES ALL OTHER COMIC BOOK MOVIES A BAD NAME Having never read any Green Lantern comics or seen him in any other media before I have no idea if it s true to the comics Either way it sucks OK so there are 3600 odd Green Lanterns who each protect 1 3600 of the universe Each has a magic ring which allows him to create anything he can imagine The ring is charged by a lantern which in turn is charged by the central core which is charged by the will power of everyone in the universe That s stupid Yes lots of comic book characters have stupid origins most to do with the magical properties of radiation but still he gets his abilities from will power Really It gets worse The big bad is charged by fear However fear turns out to be a

In [16]:
{
    "mappings": {
        "dynamic": true,
        "fields": {
            "MovieName": {"type": "token"},
            "source": {"type": "token"},
            "embedding": {
                "type": "knnVector",
                "dimensions": 768,
                "similarity": "euclidean",
            },
        },
    }
}

In [17]:
qa_retriever = vector_search.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 100, "post_filter_pipeline": [{"$limit": 5}]},
)

from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [18]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=qa_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT},
)

docs = qa({"query": "What are the superpowers of Green lantern"})

print(docs["result"])
print(docs["source_documents"])


The superpowers of Green Lantern are the ability to expel a green energy that can transform into anything he needs, and the power to use his mind and will power instead of sheer physical strength or high tech weaponry.
[Document(page_content='ringed superpowers Before we discuss the film I suggest the mystery out of the superhero Flies but not Superman Light but not Iron Man Green but do not Teenage Mutant Ninja Turtles With a lantern but not under the eye And if you re reading these lines omitting the title you already know the answer Well to get acquainted with new shoots adaptations of comics is not superfluous to learn the fate of another defender of the Earth and not only Green Lantern Especially pay attention to this movie because the green light of previously unknown on the feature film characters cartoons completely about him and his team participated in the animated Justice League does not count our view is a colorful adaptation of the old kind of comics like and distinguishe

In [20]:
docs

{'query': 'What are the superpowers of Green lantern',
 'result': '\nThe superpowers of Green Lantern are the ability to expel a green energy that can transform into anything he needs, and the power to use his mind and will power instead of sheer physical strength or high tech weaponry.',
 'source_documents': [Document(page_content='ringed superpowers Before we discuss the film I suggest the mystery out of the superhero Flies but not Superman Light but not Iron Man Green but do not Teenage Mutant Ninja Turtles With a lantern but not under the eye And if you re reading these lines omitting the title you already know the answer Well to get acquainted with new shoots adaptations of comics is not superfluous to learn the fate of another defender of the Earth and not only Green Lantern Especially pay attention to this movie because the green light of previously unknown on the feature film characters cartoons completely about him and his team participated in the animated Justice League does 