In [23]:
import os
import openai
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
# use your own openai api key
openai_key = os.getenv('OPENAI_API_KEY')
print(openai.__version__)

client = OpenAI(api_key = openai_key)

1.12.0


## Create Chatbot Davis

In [2]:
# Prompt the system and setup up message stack to track conversation
system_msg = {"role": "system", "content": "You are a helpful assistant whose name is Davis."}
message_records = [system_msg]

config = {
    "model": "gpt-3.5-turbo",
    "temperature": 0.1,
    "stream": False
}

def get_response(user_message, config):
    # input message is always from user
    new_msg = {"role": "user", "content": user_message}
    # stacking the user message to track the conversation
    message_records.append(new_msg)
    
    response = client.chat.completions.create(
        messages=message_records,
        **config,
    )
    response_msg  = response.choices[0].message.content
    assistant_msg = {"role": "assistant", "content": response_msg}
    # stacking the AI responded message to track the conversation
    message_records.append(assistant_msg)
    print(response_msg)
    return response_msg

In [3]:
user_msg = "Hi! what is your name?"
_ = get_response(user_msg, config)


Hello! My name is Davis. How can I assist you today?


In [4]:
user_msg = "Help me paraphrase this sentence ```OpenAI revives its robotic research team, plans to build dedicated AI```"
_ = get_response(user_msg, config)

OpenAI is bringing back its team focused on robotics and intends to create a specialized artificial intelligence.


In [5]:
user_msg = "Hi! what did I ask you?"
_ = get_response(user_msg, config)

You asked me to help you paraphrase the sentence "OpenAI revives its robotic research team, plans to build dedicated AI."


## Give Davis Tools to Use

In [7]:
# define the get weather tool schema
tool_schema = {
  "type": "function",
  "function": {
    "name": "get_current_weather",
    "description": "Get the current weather in a given location",
    "parameters": {
      "type": "object",
      "properties": {
        "location": {
          "type": "string",
          "description": "The city and state, e.g. San Francisco, CA"
        },
        "unit": {
          "type": "string",
          "enum": ["celsius", "fahrenheit"]
        }
      },
      "required": ["location"]
    }
  }
}
# write a python function get_weather for chatbot to call
import python_weather
import asyncio

async def get_weather(location, unit):
    degree_unit = python_weather.METRIC if unit == "celsius" else python_weather.IMPERIAL
    u = "°C" if unit == "celsius" else "°F"
    client = python_weather.Client(unit=degree_unit)
    weather = await client.get(location)
    weather_context = f"Current weather in {city}: {weather.temperature} {u}"
    await client.close()
    return weather_context

tools = [tool_schema]
tool_mapper = {"get_current_weather": get_weather}

In [8]:
# validate if the tool works
city = "San Francisco"
c = await get_weather(city, "fahrenheit")
print(c)

Current weather in San Francisco: 56 °F


In [9]:
import json
system_msg = {"role": "system", "content": "You are a helpful assistant whose name is Davis."}
message_records = [system_msg]

config = {
    "model": "gpt-3.5-turbo",
    "temperature": 0.1,
    "stream": False
}

def query(messages):
    response = client.chat.completions.create(
        messages=messages,
        **config,
    )
    return response

def query_with_tools(messages):
    response = client.chat.completions.create(
        messages=messages,
        tools = tools,
        tool_choice = 'auto',
        **config,
    )
    return response

async def call_tool(response):
    tool_name = response.choices[0].message.tool_calls[0].function.name
    args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
    print(f"Using tool `{tool_name}` with args: {args} ")
    assert tool_name in tool_mapper
    function = tool_mapper[tool_name]
    result = await function(**args)
    # append new system message to get more context that was generated by using tool
    sys_msg = {"role": "system", "content": result}
    message_records.append(sys_msg)
    print(f"Fetched tool result: {result}")
   

async def act(user_message, config, tools):
    # input message is always from user
    new_msg = {"role": "user", "content": user_message}
    # stacking the user message to track the conversation
    message_records.append(new_msg)

    # check if any tools are needed to be used
    response = query_with_tools(message_records)
    tool_calls = response.choices[0].message.tool_calls
    if tool_calls is not None and len(tool_calls) > 0:
        await call_tool(response)
        # since tools are already used, then query without tools generate better promising results
        response = query(message_records)

    # now get final response message
    response_msg  = response.choices[0].message.content
    assistant_msg = {"role": "assistant", "content": response_msg}
    # stacking the AI responded message to track the conversation
    message_records.append(assistant_msg)
    return response_msg


In [10]:
# gonna use weather tool
res = await act("how is the weather in San Francisco today?", config, tools)
print(res)

Using tool `get_current_weather` with args: {'location': 'San Francisco', 'unit': 'celsius'} 
Fetched tool result: Current weather in San Francisco: 13 °C
The weather in San Francisco today is 13°C. It seems to be a bit cool there. Do you need any more information about the weather or anything else?


In [11]:
# will not use any tools
res = await act("Suggest me one fun thing to do in San Francisco today?", config, tools)
print(res)

One fun thing to do in San Francisco today is to visit the Golden Gate Park. You can explore the beautiful gardens, visit the California Academy of Sciences, rent a paddleboat on Stow Lake, or simply enjoy a leisurely walk in the park. It's a great way to spend a relaxing day in the city. Enjoy your time in San Francisco!


## Embeddings

In [34]:
# Calculate similarity score of two sentences using openAI embeddings
from typing import List
import numpy as np
"""
Available Embedding Models
MODEL	              	MAX_INPUT  OUTPUT
text-embedding-3-small	8191       1536
text-embedding-3-large	8191       3072
text-embedding-ada-002	8191       1536
"""

# Transform texts to feature vectors
texts = [
    "def add(a, b): return a + b", 
    "def min(a, b): return a - b", 
    "The quick brown fox jumps over a lazy dog"]

def get_embeddings(texts: List[str], model="text-embedding-3-small"):
    response = client.embeddings.create(input = texts, model=model)
   
    vectors = np.array([d.embedding for d in response.data])
    return vectors


vs = get_embeddings(texts)
print(len(vs[0]))

1536


In [35]:
# 0~1 higher score means more similar
np.dot(vs[0], vs[1])

0.5161033277308834

In [36]:
np.dot(vs[1], vs[2])

0.10385193973303869

In [18]:
# Count number of tokens to prevent context overflow
import tiktoken
"""
Encoders:
GPT-4 is using cl100k_base 
GPT-4o is using o200k_base, not public available
"""
encoding = tiktoken.get_encoding("cl100k_base")
tokens = encoding.encode(texts[0])

print(tokens)
print(len(tokens))

[755, 923, 2948, 11, 293, 1680, 471, 264, 489, 293]
10
