<a href="https://colab.research.google.com/github/RRaffay/Playground/blob/main/Gemini_Search_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Gemini Tutor Testing

# Base Google Search

## Imports and Config

In [None]:
!pip install -U -q google-generativeai

In [None]:
!pip install requests

In [None]:
import pathlib
import textwrap
import time
import json
import requests

import google.generativeai as genai


from IPython import display
from IPython.display import Markdown

In [None]:
try:
    # Used to securely store your API key
    from google.colab import userdata

    # Or use `os.getenv('API_KEY')` to fetch an environment variable.
    GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
except ImportError:
    import os
    GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']

genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
import google.ai.generativelanguage as glm
serper_api_key = userdata.get('SERP_API_KEY')

## Basic Example

In [None]:
def multiply(a:float, b:float):
    """returns a * b."""
    return a*b

model = genai.GenerativeModel(model_name='gemini-1.0-pro',
                              tools=[multiply])

model

genai.GenerativeModel(
    model_name='models/gemini-1.0-pro',
    generation_config={},
    safety_settings={},
    tools=<google.generativeai.types.content_types.FunctionLibrary object at 0x7f35d2359210>,
    system_instruction=None,
)

In [None]:
chat = model.start_chat(enable_automatic_function_calling=True)

In [None]:
response = chat.send_message('I have 57 cats, each owns 44 mittens, how many mittens is that in total?')
response.text

'That makes 2508 mittens in total.'

In [None]:
for content in chat.history:
    part = content.parts[0]
    print(content.role, "->", type(part).to_dict(part))
    print('-'*80)

user -> {'text': 'I have 57 cats, each owns 44 mittens, how many mittens is that in total?'}
--------------------------------------------------------------------------------
model -> {'function_call': {'name': 'multiply', 'args': {'b': 44.0, 'a': 57.0}}}
--------------------------------------------------------------------------------
user -> {'function_response': {'name': 'multiply', 'response': {'result': 2508.0}}}
--------------------------------------------------------------------------------
model -> {'text': 'That makes 2508 mittens in total.'}
--------------------------------------------------------------------------------


## Trying Automated Search with Serper

In [None]:
serper_api_key = userdata.get('SERP_API_KEY')

In [None]:
def google_search(search_keyword: str):
    """Returns the google results for the given search word"""
    url = "https://google.serper.dev/search"

    payload = json.dumps({
        "q": search_keyword
    })

    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    print(f"Google Search for: {search_keyword}")
    return response.text


In [None]:
model = genai.GenerativeModel(model_name='gemini-1.0-pro',
                              tools=[google_search])

In [None]:
chat = model.start_chat(enable_automatic_function_calling=True)

In [None]:
response = chat.send_message('What is the CS330 Course at Duke')
response.text

## Trying Manual Search with Serper

In [None]:
import google.ai.generativelanguage as glm
serper_api_key = userdata.get('SERP_API_KEY')

In [None]:
def google_search_runner(search_keyword: str):
    """Returns the google results for the given search word"""
    url = "https://google.serper.dev/search"

    payload = json.dumps({
        "q": search_keyword
    })

    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    print(f"Google Search for: {search_keyword}")
    return response.text

In [None]:
google_search = {'function_declarations': [
    {
    "name": "google_search",
    "description": "Google search to return results of search keywords",
    "parameters": {
        "type_": "OBJECT",
        "properties": {
            "search_keyword": {
                "type_": "STRING",
                "description": "A great search keyword that most likely to return resuls for the information you are looking for"
            }
        },
        "required": [
            "search_keyword"
        ]
    }
}
]}

In [None]:
glm.Tool(google_search)

function_declarations {
  name: "google_search"
  description: "Google search to return results of search keywords"
  parameters {
    type_: OBJECT
    properties {
      key: "search_keyword"
      value {
        type_: STRING
        description: "A great search keyword that most likely to return resuls for the information you are looking for"
      }
    }
    required: "search_keyword"
  }
}

In [None]:
model = genai.GenerativeModel('gemini-pro', tools=google_search)
chat = model.start_chat()

response = chat.send_message(
    f"What is the class CS330 at Duke?",
)

In [None]:
response.candidates

[content {
  parts {
    function_call {
      name: "google_search"
      args {
        fields {
          key: "search_keyword"
          value {
            string_value: "What is the class CS330 at Duke?"
          }
        }
      }
    }
  }
  role: "model"
}
finish_reason: STOP
index: 0
safety_ratings {
  category: HARM_CATEGORY_SEXUALLY_EXPLICIT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_HARASSMENT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_DANGEROUS_CONTENT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_HATE_SPEECH
  probability: NEGLIGIBLE
}
]

In [None]:
fc = response.candidates[0].content.parts[0].function_call
assert fc.name == 'google_search'

key_word = fc.args['search_keyword']

In [None]:
key_word

'What is the class CS330 at Duke?'

In [None]:
result = google_search_runner(key_word)
result

Google Search for: What is the class CS330 at Duke?


'{"searchParameters":{"q":"What is the class CS330 at Duke?","type":"search","engine":"google"},"searchInformation":{"didYouMean":"What is the class CS 330 at Duke?"},"answerBox":{"snippet":"Design and analysis of efficient algorithms at an undergraduate level. Topics include greedy algorithms, divide and conquer, dynamic programming, graph algorithms, linear programming, randomized algorithms, and NP-completeness.","snippetHighlighted":["Design and analysis of efficient algorithms at an undergraduate level"],"title":"Computer Science 330 Spring 2024 - Sites@Duke Express","link":"https://sites.duke.edu/spring24compsci330/"},"organic":[{"title":"CS 330 Spring 2023 | Intro to Design & Analysis of Algorithms","link":"https://courses.cs.duke.edu/spring23/compsci330/","snippet":"Course Synopsis · Design techniques: Divide-and-conquer, greedy algorithms, dynamic programming, local search, and randomization · Data structures: Balanced ...","position":1},{"title":"Duke COMPSCI 330 Fall 2022 - 

In [None]:
response = chat.send_message(
    glm.Content(
    parts=[glm.Part(
        function_response = glm.FunctionResponse(
          name='google_search',
          response={'result': result}))]))

In [None]:
response.text

'CS330 at Duke is an undergraduate-level course that focuses on the design and analysis of efficient algorithms. The class covers a wide range of topics, including greedy algorithms, divide-and-conquer, dynamic programming, graph algorithms, linear programming, randomized algorithms, and NP-completeness. The course is typically offered in the fall and spring semesters.'

## Final Implementation

In [None]:
def google_search_runner(search_keyword: str):
    """Returns the google results for the given search word"""
    url = "https://google.serper.dev/search"

    payload = json.dumps({
        "q": search_keyword
    })

    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    print(f"Google Search for: {search_keyword}")
    return response.text

In [None]:
def find_class_info(class_name: str, school_name:str):
  """ Given the name of a class and school find details"""
  google_search = {'function_declarations': [
      {
      "name": "google_search",
      "description": "Google search to return results of search keywords",
      "parameters": {
          "type_": "OBJECT",
          "properties": {
              "search_keyword": {
                  "type_": "STRING",
                  "description": "A great search keyword that most likely to return resuls for the information you are looking for"
              }
          },
          "required": [
              "search_keyword"
          ]
      }
  }
  ]}

  glm.Tool(google_search)

  # Implement Retry
  try:

    model = genai.GenerativeModel('gemini-pro', tools=google_search)
    chat = model.start_chat()

    response = chat.send_message(
        f"Find more information about {class_name} taught at {school_name}",
    )


    fc = response.candidates[0].content.parts[0].function_call

    key_word = fc.args['search_keyword']



  except Exception as e:
    print(f"Error {e}")
    return "Error in generating Key Word with Gemini"


  try:
    result = google_search_runner(key_word)

  except Exception as e:
    print(f"Error {e}")
    return "Error in generating search from serper"

  response = chat.send_message(
    glm.Content(
    parts=[glm.Part(
        function_response = glm.FunctionResponse(
          name='google_search',
          response={'result': result}))]))

  return response.text



In [None]:
find_class_info("CS230","Duke University")

Google Search for: CS230 Duke University


'Here is some information about CS230 at Duke University. CS230 is an undergraduate course that introduces students to the mathematical foundations of computer science. The course covers topics such as sets, functions, relations, graphs, trees, and algorithms. CS230 is a required course for all computer science majors at Duke University. The course is typically offered in the spring semester.'

In [None]:
find_class_info("CS224W","Stanford University")

Google Search for: CS224W at Stanford University


'CS224W: Machine Learning with Graphs is a course offered by Stanford University. It focuses on computational, algorithmic, and modeling challenges specific to the analysis of graphs. The course covers topics such as graph algorithms, graph mining, graph clustering, and graph visualization. It also provides hands-on experience with real-world datasets and open-source software tools. The course is designed for students with a strong background in computer science and mathematics.'

# Article Reading

In [None]:
!pip install langchain-google-genai
!pip install langchain
!pip install langchain_community

In [None]:
try:
    # Used to securely store your API key
    from google.colab import userdata

    # Or use `os.getenv('API_KEY')` to fetch an environment variable.
    GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
except ImportError:
    import os
    GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']

genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
#Import Modules
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

#Initialize Model
llm = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY)

#Load the blog
loader = WebBaseLoader("https://thenewstack.io/the-building-blocks-of-llms-vectors-tokens-and-embeddings/")
docs = loader.load()

#Define the Summarize Chain
template = """Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:"""

prompt = PromptTemplate.from_template(template)

llm_chain = LLMChain(llm=llm, prompt=prompt)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

#Invoke Chain
response=stuff_chain.invoke(docs)
print(response["output_text"])

This article explains the fundamental building blocks of LLMs: vectors, tokens, and embeddings. Vectors are numerical representations of text or data that are understood by LLMs. Tokens are the basic units of data processed by LLMs, which are generated by tokenizers. Embeddings are vectors that represent the meaning and context of text, allowing LLMs to understand language with nuance and perform tasks like sentiment analysis and text generation.


In [None]:
def article_reader_runner(url:str):
  "Given a url, this returns the summary of the article"
  lm = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY)

  #Load the blog
  loader = WebBaseLoader(url)
  docs = loader.load()

  #Define the Summarize Chain
  template = """Write a concise summary of the following:
  "{text}"
  CONCISE SUMMARY:"""

  prompt = PromptTemplate.from_template(template)

  llm_chain = LLMChain(llm=llm, prompt=prompt)
  stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

  #Invoke Chain
  response=stuff_chain.invoke(docs)
  return response["output_text"]

In [None]:
url = "https://thenewstack.io/how-to-cure-llm-weaknesses-with-vector-databases/"

In [None]:
article_reader_runner(url)

'Vector databases can resolve the limitations of off-the-shelf LLMs, including outdated data, lack of domain-specific context, and AI hallucinations. By incorporating proprietary data and vector embeddings into LLMs, vector databases enhance accuracy and relevance, leading to more effective and efficient AI applications.'

In [None]:
article_summarizer = {'function_declarations': [
      {
      "name": "article_summarizer",
      "description": "Article summarizer given a url",
      "parameters": {
          "type_": "OBJECT",
          "properties": {
              "url": {
                  "type_": "STRING",
                  "description": "URL of the webpage to be read"
              }
          },
          "required": [
              "url"
          ]
      }
  }
  ]}

In [None]:
glm.Tool(article_summarizer)

function_declarations {
  name: "article_summarizer"
  description: "Article summarizer given a url"
  parameters {
    type_: OBJECT
    properties {
      key: "url"
      value {
        type_: STRING
        description: "URL of the webpage to be read"
      }
    }
    required: "url"
  }
}

In [None]:
model = genai.GenerativeModel('gemini-pro', tools=article_summarizer)
chat = model.start_chat()

response = chat.send_message(
    f"Find more information about what {url} is about",
)


fc = response.candidates[0].content.parts[0].function_call

fc

name: "article_summarizer"
args {
  fields {
    key: "url"
    value {
      string_value: "https://thenewstack.io/how-to-cure-llm-weaknesses-with-vector-databases/"
    }
  }
}

In [None]:
url_arg = fc.args['url']


result = article_reader_runner(url_arg)


response = chat.send_message(
  glm.Content(
  parts=[glm.Part(
      function_response = glm.FunctionResponse(
        name='article_summarizer',
        response={'result': result}))]))

response.text

'The provided URL is about how to address LLM weaknesses using vector databases. LLMs have known limitations such as outdated training data, lack of context specific to an organization, and the tendency towards AI hallucination. Vector databases use vector embeddings to encode data, which increases value and improves the accuracy of search results. When combined with LLMs, businesses can extend off-the-shelf AI solutions with domain-specific knowledge which mitigates risks and unlocks the potential of real-time AI applications.'

#  Search and Summarize

## Imports and Config

In [16]:
#Import Modules
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

import pathlib
import textwrap
import time
import json
import requests

import google.generativeai as genai
import google.ai.generativelanguage as glm
from google.colab import userdata


from IPython import display
from IPython.display import Markdown

In [17]:
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [18]:
serper_api_key = userdata.get('SERP_API_KEY')

## Search

In [19]:
def google_search_runner(search_keyword: str):
    """Returns the google results for the given search word"""
    url = "https://google.serper.dev/search"

    payload = json.dumps({
        "q": search_keyword
    })

    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    print(f"Google Search for: {search_keyword}")
    return response.text

In [20]:
google_search = {'function_declarations': [
      {
      "name": "google_search",
      "description": "Google search to return results of search keywords",
      "parameters": {
          "type_": "OBJECT",
          "properties": {
              "search_keyword": {
                  "type_": "STRING",
                  "description": "A great search keyword that most likely to return resuls for the information you are looking for"
              }
          },
          "required": [
              "search_keyword"
          ]
      }
  }
]}

glm.Tool(google_search)

function_declarations {
  name: "google_search"
  description: "Google search to return results of search keywords"
  parameters {
    type_: OBJECT
    properties {
      key: "search_keyword"
      value {
        type_: STRING
        description: "A great search keyword that most likely to return resuls for the information you are looking for"
      }
    }
    required: "search_keyword"
  }
}

## Summarizer

In [21]:
def article_reader_runner(url:str):
  "Given a url, this returns the summary of the article"
  print(f"Reading article: {url}")
  llm = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY)

  #Load the blog
  loader = WebBaseLoader(url)
  docs = loader.load()

  #Define the Summarize Chain
  template = """Write a concise summary of the following:
  "{text}"
  CONCISE SUMMARY:"""

  prompt = PromptTemplate.from_template(template)

  llm_chain = LLMChain(llm=llm, prompt=prompt)
  stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

  #Invoke Chain
  response=stuff_chain.invoke(docs)
  return response["output_text"]

In [22]:
article_summarizer = {'function_declarations': [
      {
      "name": "article_summarizer",
      "description": "Reads article given a url: Returns summary of article",
      "parameters": {
          "type_": "OBJECT",
          "properties": {
              "url": {
                  "type_": "STRING",
                  "description": "URL of the webpage to be read"
              }
          },
          "required": [
              "url"
          ]
      }
  }
]}

## Together

In [23]:
def handle_response(response,chat):
  if response.candidates[0].content.parts[0].function_call.name == "google_search":
    fc = response.candidates[0].content.parts[0].function_call
    key_word = fc.args["search_keyword"]
    result = google_search_runner(key_word)
    response = chat.send_message(
      glm.Content(
      parts=[glm.Part(
          function_response = glm.FunctionResponse(
            name='google_search',
            response={'result': result}))]))
    return response
  elif response.candidates[0].content.parts[0].function_call.name == "article_summarizer":
    fc = response.candidates[0].content.parts[0].function_call
    url_arg = fc.args['url']
    result = article_reader_runner(url_arg)
    response = chat.send_message(
      glm.Content(
      parts=[glm.Part(
          function_response = glm.FunctionResponse(
            name='article_summarizer',
            response={'result': result}))]))
    return response

  return response


In [24]:
def class_details(class_name,school_name):
  model = genai.GenerativeModel('gemini-1.0-pro', tools=[article_summarizer,google_search])
  chat = model.start_chat()
  response = chat.send_message(
    f"Find more information about the {school_name} {class_name} by first searching for articles and then reading from the url. We require a thorough understanding of the course contents and objectives",
  )

  fc = response.candidates[0].content.parts[0].function_call

  while fc:
    response = handle_response(response,chat)
    fc = response.candidates[0].content.parts[0].function_call

  return response.text


In [None]:
class_details("CS330","Duke")

In [26]:
class_details("CS224W","Stanford University")

ERROR:grpc._plugin_wrapping:AuthMetadataPluginCallback "<google.auth.transport.grpc.AuthMetadataPlugin object at 0x789a26f95300>" raised exception!
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/credentials.py", line 128, in refresh
    self._retrieve_info(request)
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/credentials.py", line 101, in _retrieve_info
    info = _metadata.get_service_account_info(
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/_metadata.py", line 323, in get_service_account_info
    return get(request, path, params={"recursive": "true"})
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/_metadata.py", line 248, in get
    raise exceptions.TransportError(
google.auth.exceptions.TransportError: ("Failed to retrieve http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/?recursive=true from the Go

KeyboardInterrupt: 