### Pathways Language Model (PaLM)

In [None]:
#pip install -U google-generativeai

In [1]:
import numpy as np
import google.generativeai as palm
import os
os.getcwd()

'C:\\Users\\Zoe\\Documents\\Python\\LLM_explore\\PALM_TEST'

In [3]:
#palm.configure(api_key='AIzaSyA5L6EeUZMpGQPPMRnX5Q6d8ak2yGJKSs4')
palm.configure(api_key=os.getenv("PALM_API_KEY"))

In [4]:
for model in palm.list_models():
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001


In [13]:
embedding_model = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods][0]
text_model = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods][0]
print(embedding_model)
print(text_model)

Model(name='models/embedding-gecko-001', base_model_id='', version='001', display_name='Embedding Gecko', description='Obtain a distributed representation of a text.', input_token_limit=1024, output_token_limit=1, supported_generation_methods=['embedText'], temperature=None, top_p=None, top_k=None)
Model(name='models/text-bison-001', base_model_id='', version='001', display_name='Text Bison', description='Model targeted for text generation.', input_token_limit=8196, output_token_limit=1024, supported_generation_methods=['generateText'], temperature=0.7, top_p=0.95, top_k=40)


## PALM Text Generation

In [14]:
prompt = """
You are an expert at solving word problems.

Solve the following problem:

I have three houses, each with three cats.
each cat owns 4 mittens, and a hat. Each mitten was
knit from 7m of yarn, each hat from 4m.
How much yarn was needed to make all the items?

Think about it step by step, and show your work.
"""

completion = palm.generate_text(
    model=text_model.name,
    prompt=prompt,
    temperature=0,
    # The maximum length of the response
    max_output_tokens=800,
)

print(completion.result)

Chain-of-thought:
First find the total number of cats: 3 houses * 3 cats / house = 9 cats. Then multiply the number of cats by the number of mittens per cat to find the total number of mittens: 9 cats * 4 mittens / cat = 36 mittens. Then multiply the number of mittens by the length of yarn per mitten to find the total length of yarn used for mittens: 36 mittens * 7m / mitten = 252m. Then multiply the number of cats by the number of hats per cat to find the total number of hats: 9 cats * 1 hat / cat = 9 hats. Then multiply the number of hats by the length of yarn per hat to find the total length of yarn used for hats: 9 hats * 4m / hat = 36m. Then add the length of yarn used for mittens and hats to find the total length of yarn used: 252m + 36m = 288m.

The answer should be 288


## PALM Chat

In [8]:
# Create a new conversation
response = palm.chat(messages='Hello')

# Last contains the model's response:
response.last

'Hello! How can I help you today?'

In [9]:
# Add to the existing conversation by sending a reply
response = response.reply("Just chillin'")
# See the model's latest response in the `last` field:
response.last

"That's great! I'm glad you're able to relax and take some time for yourself. What are you up to today?"

In [10]:
response = response.reply("I'm planning on going to the park")
# See the model's latest response in the `last` field:
response.last

'That sounds like a great idea! Parks are a great place to relax and enjoy the outdoors. There are many things you can do at the park, such as:\n\n* Go for a walk or run\n* Have a picnic\n* Play games\n* Read a book\n* Just sit and relax\n\nI hope you have a great time at the park!'

In [11]:
response.messages

[{'author': '0', 'content': 'Hello'},
 {'author': '1', 'content': 'Hello! How can I help you today?'},
 {'author': '0', 'content': "Just chillin'"},
 {'author': '1',
  'content': "That's great! I'm glad you're able to relax and take some time for yourself. What are you up to today?"},
 {'author': '0', 'content': "I'm planning on going to the park"},
 {'author': '1',
  'content': 'That sounds like a great idea! Parks are a great place to relax and enjoy the outdoors. There are many things you can do at the park, such as:\n\n* Go for a walk or run\n* Have a picnic\n* Play games\n* Read a book\n* Just sit and relax\n\nI hope you have a great time at the park!'}]

In [12]:
import json

for item in response.messages:
    print(item['content'])

Hello
Hello! How can I help you today?
Just chillin'
That's great! I'm glad you're able to relax and take some time for yourself. What are you up to today?
I'm planning on going to the park
That sounds like a great idea! Parks are a great place to relax and enjoy the outdoors. There are many things you can do at the park, such as:

* Go for a walk or run
* Have a picnic
* Play games
* Read a book
* Just sit and relax

I hope you have a great time at the park!


## Create knowledge base (saved in dataframe)

In [11]:
#pip install --upgrade langchain
#!pip install pypdf

In [17]:
import os
import pandas as pd
import langchain
from langchain.document_loaders import PyPDFDirectoryLoader

In [12]:
docs_directory = str(os.getcwd()) + '/knowledge_base'
loader = PyPDFDirectoryLoader(f"{docs_directory}")
# documents = loader.load()
docs = loader.load_and_split()

In [13]:
len(docs)

9

In [14]:
context = "\n".join(str(p.page_content) for p in docs)
print("The total words in the context: ", len(context))

The total words in the context:  7915


In [15]:
docs[0]

Document(page_content="07/08/2023, 00:27 Document\nhttps://vouchers.cdc.gov .sg/merchants/faq/ 1/9HOME/MERCHANTS/MERCHANTS' FAQ\nMerchants' FAQ\nMerchants' F AQ \ue93c\nRegistration for Interested Merchants  | Onboarding for Registered\nMerchants\nAbout CDC V ouchers Scheme\n What is the CDC V ouchers Scheme 2023?\nIn January 2023, every Singaporean household can claim $300 in CDC\nVouchers. This is part of the enhanced support under the Assurance\nPackage to help cushion the impact of additional Goods & Services Tax\n(GST) for Singaporean households and the $1.5 billion Support\nPackage announced in October 2022. The value of the vouchers will be\nallocated equally (i.e., $150 each) to be spent at participating heartland\nmerchants and hawkers, or at participating supermarkets. Singaporean\nHouseholds have until 31 December 2023 to claim and spend their\nCDC V ouchers 2023.\n What are the key benefits of signing up?\nWith the roll-out of the CDC V ouchers Scheme, participating merchan

In [18]:
# Convert the list of dictionaries to a DataFrame
data = {
    'context': [doc.page_content for doc in docs],
    'source': [doc.metadata['source'] for doc in docs],
    'page': [doc.metadata['page'] for doc in docs]
}
df = pd.DataFrame(data)

In [19]:
print(df['context'][0])

07/08/2023, 00:27 Document
https://vouchers.cdc.gov .sg/merchants/faq/ 1/9HOME/MERCHANTS/MERCHANTS' FAQ
Merchants' FAQ
Merchants' F AQ 
Registration for Interested Merchants  | Onboarding for Registered
Merchants
About CDC V ouchers Scheme
 What is the CDC V ouchers Scheme 2023?
In January 2023, every Singaporean household can claim $300 in CDC
Vouchers. This is part of the enhanced support under the Assurance
Package to help cushion the impact of additional Goods & Services Tax
(GST) for Singaporean households and the $1.5 billion Support
Package announced in October 2022. The value of the vouchers will be
allocated equally (i.e., $150 each) to be spent at participating heartland
merchants and hawkers, or at participating supermarkets. Singaporean
Households have until 31 December 2023 to claim and spend their
CDC V ouchers 2023.
 What are the key benefits of signing up?
With the roll-out of the CDC V ouchers Scheme, participating merchants
and hawkers can expect the following:


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   context  9 non-null      object
 1   source   9 non-null      object
 2   page     9 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 344.0+ bytes


In [21]:
df.to_csv('test.csv', index=False, escapechar='\\')