In [None]:
#@title Install Requirements
!pip install -qU "langchain[google-vertexai]"
!pip install -qU langchain-google-vertexai
!pip install langchain-google-genai
!pip install -qU langchain-core
!pip install -qU pypdf langchain_community
!pip install playwright
!playwright install

In [None]:
#@title Get Google AI API Key
## You need to put your Google AI API key.

import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [None]:
#@title Authenticate with Google Cloud and your project ID
# Please put your google cloud project id.

import vertexai
from google.colab import auth

gcp_project_id = 'ENTER PROJECT ID' # @param {type: "string"}

auth.authenticate_user(project_id=gcp_project_id)

vertexai.init(project=gcp_project_id)

In [7]:
#@title RAG Train Model
#!pip install -qU "langchain[google-vertexai]"
#!pip install -qU langchain-google-vertexai
#!pip install langchain-google-genai
#!pip install -qU langchain-core
#!pip install -qU pypdf langchain_community
#!pip install playwright
#!playwright install

import requests
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer

# embed the document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
import urllib.request

from langchain_community.document_loaders import PyPDFLoader

file_path = "./movies_data_1.txt"
loader = TextLoader(file_path)

docs = loader.load()


# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# load chat model
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

# load embedding model
embedding_model = VertexAIEmbeddings(model="text-embedding-004")

# vector store
vector_store = InMemoryVectorStore(embedding=embedding_model)
vector_store.add_texts([docs.page_content for docs in splits])
retriever = vector_store.as_retriever()


# run inference with RAG
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

system_prompt = (
    "You are an LLM that should answer my questions with correct answers."
    "Please use the provided information to answer the question to the best "
    "of your ability. If you are unsure how to answer the question, say that "
    "you are unable to answer the question.\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "Can you give me a recommendation for a Netflix Movie that is a French Sports Movie? Can you then tell me more about that movie?"})
print(results['answer'])

I recommend **Les Bleus - Une autre histoire de France, 1996-2016**.

This 2016 French documentary, directed by Pascal Blanchard, Sonia Dauger, and David Dietz, charts 20 years of the French national soccer team, Les Bleus, whose ups and downs have mirrored those of French society. It was added to Netflix on March 15, 2017, and is rated TV-MA.



In [9]:

results = rag_chain.invoke({"input": "I want to watch a movie on Netflix with Hugh Jackman, can I have a couple of suggestions and their IMDB ratings (if available)?"})
print(results['answer'])

Here are a couple of suggestions for movies on Netflix that star Hugh Jackman:

1.  **Real Steel:** This is an Action & Adventure, Sci-Fi & Fantasy, and Sports movie.
2.  **Les Misérables:** This is a Dramas, International Movies, and Music & Musicals movie.

Unfortunately, the provided data does not include IMDB ratings.


In [14]:
results = rag_chain.invoke({"input": "Can you give me a recommendation for a Netflix romance movie with time travel?"})
print(results['answer'])

I can recommend "The Time Traveler's Wife," "When We First Met," or "Kate & Leopold." "About Time" also involves time travel, but it is categorized as a comedy and drama instead of a romance.



In [17]:
results = rag_chain.invoke({"input": "Can you give me a recommendation for a Netflix movie with an IMDB rating over 8?"})
print(results['answer'])

BLACKPINK: Light Up the Sky has an IMDB rating of 8.4.

