In [7]:
import google.generativeai as palm
from qdrant_client import QdrantClient
import os
import chromadb, pickle
from chromadb.api.types import Documents, Embeddings
from google.generativeai.types import HarmCategory
from google.generativeai.types import HarmBlockThreshold

In [8]:
palm.configure(api_key="")

In [9]:
client = chromadb.PersistentClient(path="./embeddings/")

In [10]:
models = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods]
model = models[0]

In [11]:
count = 0

def embed_function(texts: Documents) -> Embeddings:
  # Embed the documents using any supported method
  all_embeddings = []
  global count
  for i in texts:
    all_embeddings.append(palm.generate_embeddings(model, i)['embedding'])
  print(count)
  count += 1
  return all_embeddings
db = client.get_collection("palm_hazwoper_test", embedding_function=embed_function)

In [12]:
ret = "\n\n".join([i for i in db.query(query_texts=["what do you know about silica awareness"], n_results=3)['documents'][0]])
ret

0


"title\nSilica Exposure Awareness for Occupational Safety\ndescription\nSilica Exposure Awareness for Occupational Safety\nWhat is Silica?\nSilica, also known as silicon dioxide (SiO₂), is a naturally occurring mineral in the earth’s crust; often found as stone and sand. Silica occurs in two forms – crystalline silica and amorphous (non-crystalline) silica.\nCrystalline silica\nis hard, with a high melting point, and is chemically inactive. Quartz is the most common form of crystalline silica found and used in the creation of various products. Cristobalite and Tridymite are the other two known forms of crystalline silica.\nSilica is found in a variety of products, and there are various uses for silica, globally. Crystalline silica is used to make pottery, ceramic, and glass products. Also, when sand, stone, or rocks are used to produce other materials; for example, concrete, mortar, bricks, etc., these transformed products also contain silica. Crystalline silica is also found in granit

In [13]:
def get_relevant_passage(query, db):
  passage = "\n\n".join([i for i in db.query(query_texts=[query], n_results=3)['documents'][0]])
  return passage

In [14]:
def make_prompt(query, relevant):
  escaped = relevant.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = ("""You are a customer support agent for the company "Hazwoper-osha", \
    do not play any other role. Use the following pieces of context to answer the question at the end. \
    If you don't know the answer, just say "I can only answer Hazwoper-OSHA related questions, please contact support \
    if you need further assistance", don't try to make up an answer. Use five sentences maximum.
  
  Context: {relevant}
Question: {query}
Kindly Answer Question:""").format(query=query, relevant=escaped)

  return prompt

In [15]:
text_models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
text_model = text_models[0]
text_model

Model(name='models/text-bison-001', base_model_id='', version='001', display_name='Text Bison', description='Model targeted for text generation.', input_token_limit=8196, output_token_limit=1024, supported_generation_methods=['generateText', 'countTextTokens', 'createTunedTextModel'], temperature=0.7, top_p=0.95, top_k=40)

In [16]:
def answer(model, query, db, temperature=0.01):
  passage = get_relevant_passage(query, db)
  prompt = make_prompt(query, passage)
  print("Prompt: ", prompt)
  answer = palm.generate_text(
    prompt=prompt,
    model=model,
    candidate_count=3, 
    temperature=temperature, 
    max_output_tokens=1000,
    safety_settings = [
      {
        "category": HarmCategory.HARM_CATEGORY_DEROGATORY,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
      {
        "category": HarmCategory.HARM_CATEGORY_TOXICITY,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
      {
        "category": HarmCategory.HARM_CATEGORY_SEXUAL,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
      {
        "category": HarmCategory.HARM_CATEGORY_MEDICAL,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
      {
        "category": HarmCategory.HARM_CATEGORY_DANGEROUS,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
      {
        "category": HarmCategory.HARM_CATEGORY_VIOLENCE,
        "threshold": HarmBlockThreshold.BLOCK_NONE,
      },
    ]
  )
  return answer

In [19]:
temperature = 0.65
query = "What do you know about NFPA 70."
ans = answer(text_model, query, db)

2
Prompt:  You are a customer support agent for the company "Hazwoper-osha",     do not play any other role. Use the following pieces of context to answer the question at the end.     If you don't know the answer, just say "I can only answer Hazwoper-OSHA related questions, please contact support     if you need further assistance", don't try to make up an answer. Use five sentences maximum.
  
  Context: title Glossary Arc Flash Safety Training (NFPA 70E) description A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ACBM Acceptable means that an installation or equipment is acceptable to the Assistant Secretary of Labor and approved within the meaning of 29 CFR 1910 Subpart S: If it is accepted, or certified, or listed, or labeled, or otherwise determined to be safe by a nationally recognized testing laboratory recognized pursuant to § 1910.7 ; or With respect to an installation or equipment of a kind that no nationally recognized testing laboratory accepts, certifies, lists, labels

In [20]:
ans.candidates[0]['output']

'NFPA 70 is the National Electrical Code (NEC), which is an internationally accepted American National Standard that defines minimum electrical safety standards for the safe installation of electrical wiring and equipment. The NEC is published by the National Fire Protection Association (NFPA).\n\nThe NEC is used by electrical contractors, engineers, and other professionals to design, install, and maintain electrical systems. It is also used by building inspectors to ensure that electrical systems are installed in accordance with the code.\n\nThe NEC is updated every three years to reflect new technologies and safety standards. The 2023 edition of the NEC is currently in development.\n\nThe NEC is a comprehensive code that covers a wide range of electrical safety topics, including:\n\n* Wiring methods and materials\n* Grounding and bonding\n* Overcurrent protection\n* Arc-flash protection\n* Electrical safety for the construction industry\n* Electrical safety for the general industry\n