In [None]:
############################################################################
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
############################################################################
#  DISCLAIMER
# The code shared by CEs with customers must be limited to non-production
# demo/example code or infrastructure automation scripts.
############################################################################
## Colab: @martincabrera, @scavaletto (en los monitos)

# Vertex AI SDK for Python: LangChain + Google PaLM Q&A Demo
<table align="left">
  <td>
    <a href="https://colab.sandbox.google.com/drive/1PB7cLrCuJ2jAlL1XpNV2os56A3chJZxe?resourcekey=0-fmVW2-doHi7lAPllkN9Jew#scrollTo=cATNnfXpf5Zg">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href=" ">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
<a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/workbench/ads_targetting/training-multi-class-classification-model-for-ads-targeting-usecase.ipynb&_ga=2.216582484.338953940.1687460910-37483724.1687446497" target='_blank'>
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>

# Vertex LLM SDK samples

# **LangChain + Google PaLM Q&A demo with private data**

- To simplify and standardize the code, we use LangChain+Chroma.
- The input data could be any pdf file or a csv file with a column named 'text'.

# Installing the package

In [1]:
# We have to first authenticate
from google.colab import auth as google_auth
google_auth.authenticate_user()

In [None]:
# Install modules
!pip install google-cloud-aiplatform
!pip install langchain
!pip install chromadb #base de vectores
!pip install PyPDF2 # OCR

#### ! ^^^^ Do not forget to click the "Restart runtime" button

# Authenticating

In [1]:
PROJECT_ID = "cloud-llm-preview1"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

# Library

In [2]:
# Import modules
import os
import requests
import pandas as pd
import vertexai
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from PyPDF2 import PdfReader
from IPython.display import display, Markdown

Carga de dopcumentos

In [3]:
# Upload your data file
from google.colab import files
uploaded = files.upload()
file_name = next(iter(uploaded))

Saving PreguntasFrecuentesChileAtiende.csv to PreguntasFrecuentesChileAtiende.csv


## Pre-process
The pdf or csv file, reading the text data and creating a text splitter with text data chunks

In [5]:
# Pre-process the pdf or csv file, reading the text data
# and creating a text splitter with text data chunks
raw_text = ''
file_extension = file_name[file_name.index(".")+1 : len(file_name)]
if (file_extension.lower() == 'pdf'):
  # Load text data from the pdf file
  reader = PdfReader(file_name)
  for page in reader.pages:
    raw_text = raw_text + page.extract_text()
elif (file_extension.lower() == 'csv'):
  # Load text data from csv file
  df = pd.read_csv(file_name)
  df.head()
  for index, row in df.iterrows():
      raw_text += row['Pregunta'] + '-'+ row['Respuesta'] + '\n'
      print(raw_text)
else:
  print("File extension not supported")
  quit()

# Create text data chunks
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 5000,
    chunk_overlap  = 200,
    length_function = len,)
text_chunks = text_splitter.split_text(raw_text)

¿Quiénes son los nuevos beneficiados con la Ley Corta de PGU?-Ingresan, como nuevas personas beneficiarias de PGU, todos quienes habían quedado fuera por el criterio de focalización del 10% más rico de la población de 65 años o más, que ahora se modificó por el 10% del total del país.

De esta manera, cerca de la mitad de las solicitudes que fueron rechazadas entre agosto de 2022 y marzo de 2023, por el mencionado criterio, serán aprobadas a partir de abril de 2023 gracias a la Ley Corta de la PGU.

De todas formas, por si tiene dudas con respecto a la concesión de su beneficio, puede realizar una nueva solicitud de PGU.

¿Quiénes son los nuevos beneficiados con la Ley Corta de PGU?-Ingresan, como nuevas personas beneficiarias de PGU, todos quienes habían quedado fuera por el criterio de focalización del 10% más rico de la población de 65 años o más, que ahora se modificó por el 10% del total del país.

De esta manera, cerca de la mitad de las solicitudes que fueron rechazadas entre ag

Generate the chroma embeddings

In [6]:
vertexai.init(project=PROJECT_ID, location=LOCATION)

# Generate the chroma embeddings db using the
# Google PaLM embeddings models
palm_embeddings_model = VertexAIEmbeddings()
print('Generating PaLM embeddings...')
persist_directory = 'db'
palm_docsearch = Chroma.from_texts(text_chunks, palm_embeddings_model, persist_directory=persist_directory)
palm_docsearch.persist()
print('PaLM embeddings generated')

# Create the chain to answer question
palm_chain = load_qa_chain(VertexAI(), chain_type="stuff")

Generating PaLM embeddings...
PaLM embeddings generated


In [7]:
# Ruta de cloud storage para guardar los vectores
!gsutil cp -r ./db gs://codefile-examples-bucket/chromadb/

Copying file://./db/chroma-embeddings.parquet [Content-Type=application/octet-stream]...
Copying file://./db/chroma-collections.parquet [Content-Type=application/octet-stream]...
Copying file://./db/index/uuid_to_id_f9f7005d-7fbf-4517-b92b-a9c81e8399d6.pkl [Content-Type=application/octet-stream]...
Copying file://./db/index/index_f9f7005d-7fbf-4517-b92b-a9c81e8399d6.bin [Content-Type=application/octet-stream]...
\
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying file://./db/index/index_metadata_f9f7005d-7fbf-4517-b92b-a9c81e8399d6.pkl [Content-Type=application/octet-stream]...
Copying file://./db/index/id_to_uuid_f9f7005d-7fbf-4517-b92b-a9c81e8399d6.pkl [Content-Type=application/octet-stream]...
\ [6 files][ 24.7 KiB/ 24.7 KiB]                                                
Ope

Configuración de preguntas

In [8]:
# Q&A function
def answer_question(question):
    palm_docs = palm_docsearch.similarity_search(query=question, k=3)
    palm_answer = palm_chain.run(input_documents=palm_docs, question=question)
    display(Markdown('**PaLM:**\n'))
    display(Markdown(palm_answer + '\n'))

In [9]:
answer_question("¿Qué es la Ley Corta de PGU?")



**PaLM:**


La Ley Corta de la Pensión Garantizada Universal (PGU) establece un cambio en el requisito de focalización, permitiendo que más personas puedan acceder al beneficio.
