In [1]:
%%capture --no-stderr
!pip3 install -q google-cloud-aiplatform
!pip3 install -q langchain
!pip3 install -q langchain-google-genai
!pip3 install -q langchain-google-vertexai
!pip3 install -q wikipedia
!pip3 install -q chromadb
!pip3 install -q langchain-community

In [2]:
# restart the kernet after libraries are loaded
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

# Initial Setup

In [1]:
import os
import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown

key_name = !gcloud services api-keys list --filter="gemini-api-key" --format="value(name)"
key_name = key_name[0]

api_key = !gcloud services api-keys get-key-string $key_name --location="us-central1" --format="value(keyString)"
api_key = api_key[0]

os.environ["GOOGLE_API_KEY"] = api_key

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

## Import Required Libraries

In [2]:
from langchain import PromptTemplate
from langchain import hub
from langchain.docstore.document import Document
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import StrOutputParser
from langchain.schema.prompt_template import format_document
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import Chroma

In [3]:
# Define project information
import sys
import subprocess

PROJECT_ID = subprocess.check_output(["gcloud", "config", "get-value", "project"], text=True).strip()
LOCATION = "us-central1"  # @param {type:"string"}

print(f"Your project ID is: {PROJECT_ID}")

Your project ID is: qwiklabs-gcp-01-de124baa5096


## Task 1. Load `Documents` from Wikipedia

In [4]:
# Use the LangChain documentation to load documents for the query below
# Set the following parameters:
#  * query: "Gemini GPT-4"
#  * load_max_docs: 10
# https://python.langchain.com/docs/integrations/document_loaders/wikipedia

query="Gemini GPT-4"
max_docs=10
 
docs = WikipediaLoader(query=query, load_max_docs=max_docs).load()
len(docs)

10

## Task 2. Use `RecursiveTextSplitter` to split Documents

In [5]:
# Use the LangChain documentation to split the docs loaded into smaller chunks for indexing
# https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
docs = text_splitter.split_documents(docs)

print(f"# of documents = {len(docs)}")

# of documents = 10


## Task 3. Index Documents in Chroma DB Vector Store

In [6]:
# Insert the correct model name in the constructor below
# https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#models

from langchain_google_vertexai import VertexAIEmbeddings
embeddings = VertexAIEmbeddings(model_name="text-embedding-004")


In [7]:
# Reference the correct parameters (already defined) to properly index 
# the documents loaded from Wikipedia into Chroma DB as embeddings
# https://python.langchain.com/docs/integrations/vectorstores/chroma

vectorstore = Chroma.from_documents(
documents=docs,                 # Data
embedding=embeddings,           # Embedding model
persist_directory="./chroma_db" # Directory to save data
)

In [8]:
vectorstore_disk = Chroma(
    persist_directory="./chroma_db", # Directory of db
    embedding_function=embeddings    # Embedding model
)

  warn_deprecated(


## Task 4. Setup a Retriever

In [9]:
# Setup Chroma DB as a `Retriever` for querying the documents
# set the k value to 10
# https://python.langchain.com/docs/integrations/vectorstores/chroma#retriever-options

retriever = vectorstore_disk.as_retriever(search_kwargs={"k": 10})


In [10]:
print("subscribe to techcps")

subscribe to techcps


In [11]:
# Test the retriever with a query
doc = retriever.get_relevant_documents("Google Gemini")
doc

  warn_deprecated(


[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Gemini_(language_model)', 'summary': "Google Gemini is a family of multimodal large language models developed by Google DeepMind, serving as the successor to LaMDA and PaLM 2. Comprising Gemini Ultra, Gemini Pro, Gemini Flash, and Gemini Nano, it was announced on December 6, 2023, positioned as a competitor to OpenAI's GPT-4. It powers the chatbot of the same name.\n\n", 'title': 'Gemini (language model)'}, page_content='Google Gemini is a family of multimodal large language models developed by Google DeepMind, serving as the successor to LaMDA and PaLM 2. Comprising Gemini Ultra, Gemini Pro, Gemini Flash, and Gemini Nano, it was announced on December 6, 2023, positioned as a competitor to OpenAI\'s GPT-4. It powers the chatbot of the same name.\n\n\n== History ==\n\n\n=== Development ===\n\nGoogle announced Gemini, a large language model (LLM) developed by subsidiary Google DeepMind, during the Google I/O keynote on May 10, 

## Task 5. Setup Model and Build LangChain `Chain`

In [12]:
# Insert the correct model name in the constructor below.
# https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
# Ensure that the output is the least random configurable
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model="gemini-1.0-pro", temperature=0.8, top_p=0.85)


In [13]:
# Prompt template to query Gemini
llm_prompt_template = """You are an assistant for question-answering tasks.
Use the following context to answer the question.
If you don't know the answer, just say that you don't know.
Use five sentences maximum and keep the answer concise.\n
Question: {question} \nContext: {context} \nAnswer:"""

prompt = PromptTemplate.from_template(llm_prompt_template)

print(prompt)

input_variables=['context', 'question'] template="You are an assistant for question-answering tasks.\nUse the following context to answer the question.\nIf you don't know the answer, just say that you don't know.\nUse five sentences maximum and keep the answer concise.\n\nQuestion: {question} \nContext: {context} \nAnswer:"


In [14]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [15]:
# Complete the Chain in the correct order. You need to leverage the `prompt` and `model` defined
# in earlier cells in the correct order to run the next cell successfully by replacing CHAIN_1 and CHAIN_2.
chain = (
{ "context": retriever | format_docs, "question": RunnablePassthrough() }
| prompt
| model
| StrOutputParser()
)

In [16]:
chain.invoke("What is Gemini?")

'Gemini is a family of multimodal large language models developed by Google DeepMind. It is the successor to LaMDA and PaLM 2, and was announced on December 6, 2023. Gemini comprises Gemini Ultra, Gemini Pro, Gemini Flash, and Gemini Nano.'