In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv
# from ngramsapi import get_word_trends, plot_ngrams
import json
from typing import List, Dict, Union
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

: 

In [67]:
# from os import path
import requests 
import urllib 
import pandas as pd
import matplotlib.pyplot as plt

default_params = {
    'year_start': 1674,
    'year_end': 1912,
    'corpus': "en-GB-2019",
    'smoothing': 3,
    'case_insensitive': False,
}
def get_word_trends(query, **kwargs): 
    params = default_params.copy()  # Copy the default parameters
    params.update(kwargs)  # Update with any provided parameters
    
    # print(params)
    # Extract individual parameters
    year_start = params['year_start']
    year_end = params['year_end']
    corpus = params['corpus']
    smoothing = params['smoothing']
    case_insensitive = params['case_insensitive']
    query = urllib.parse.quote(query) 
    
    
    url = f'https://books.google.com/ngrams/json?content={query}' \
      f'&year_start={year_start}' \
      f'&year_end={year_end}' \
      f'&corpus={corpus}' \
      f'&smoothing={smoothing}' \
      f'{f'&case_insensitive={case_insensitive}' if case_insensitive else ""}'

    print(url)

    response = requests.get(url) 
    output = response.json() 
    return_data = [] 
    years = list(range(year_start, year_end + 1))
    df = pd.DataFrame({'year': years})
  
    if len(output) == 0: 
        return "No data available for this Ngram."
    for num in range(len(output)): 
        ngram = output[num]['ngram']
        timeseries = output[num]['timeseries']
        df[ngram] = timeseries
        return_data.append((ngram, timeseries))
    
    # df.to_csv(path.join(path.dirname(__file__), 'ngrams_statistics.csv'), index=False)
    return df.to_json()
  
def plot_ngrams(df):
    plt.figure(figsize=(10, 6))
    for column in df.columns[1:]:
        plt.plot(df['year'], df[column], label=column)
    
    plt.xlabel('Year')
    plt.ylabel('Frequency')
    plt.title('Ngram Frequencies Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

In [20]:
tools_list = [{
  "type": "function",
  "function": {
    "name": "get_word_trends",
    "description": "Get the trends of a word over time using the google ngrams api",
    "parameters": {
      "type": "object",
      "properties": {
        "query": {
          "type": "string",
          "description": "The word to get the trends for"
        },
        "year_start": {
          "type": "integer",
          "description": "The start year for the trends"
        },
        "year_end": {
          "type": "integer",
          "description": "The end year for the trends"
        },
        "corpus": {
          "type": "string",
          "description": "The corpus to use for the trends"
        },
        "smoothing": {
          "type": "integer",
          "description": "The smoothing factor for the trends"
        },
        "case_insensitive": {
          "type": "boolean",
          "description": "Flag to indicate if the search should be case insensitive"
        },
      },
      "required": ["query"]
    }
    
  }
}]

In [37]:
assistant = client.beta.assistants.retrieve(assistant_id=os.getenv("OPENAI_ASSISTANT_ID"))

In [38]:
class thread:
  id = ""


In [68]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What are the trends of the word old bailey between the year 1850 to 1860"
)


In [69]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Use the function tool for this query."
)
# run

In [70]:
tool = run.required_action.submit_tool_outputs.tool_calls[0].function.arguments

In [72]:
if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
else:
  print(run.status)

requires_action


In [59]:
def args_to_dict(tool: str) -> Dict[str, str]:
  return json.loads(tool)

In [44]:
# tool = run.required_action.submit_tool_outputs.tool_calls[0].function.arguments
# args = args_to_dict(tool)
# print(args)
# trends = get_word_trends(**args)
# plot_ngrams(trends)
# get_word_trends

In [73]:
tool_outputs = []
 
for tool in run.required_action.submit_tool_outputs.tool_calls:
  args = args_to_dict(tool.function.arguments)
  res = get_word_trends(**args)
  print(res)
  if tool.function.name == "get_word_trends":
    tool_outputs.append({
      "tool_call_id": tool.id,
      "output": res
    })
 
if tool_outputs:
  try:
    run = client.beta.threads.runs.submit_tool_outputs_and_poll(
      thread_id=thread.id,
      run_id=run.id,
      tool_outputs=tool_outputs
    )
    print("Tool outputs submitted successfully.")
  except Exception as e:
    print("Failed to submit tool outputs:", e)
else:
  print("No tool outputs to submit.")
 
if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
else:
  print(run.status)

https://books.google.com/ngrams/json?content=old%20bailey&year_start=1850&year_end=1860&corpus=en-GB-2019&smoothing=3
{"year":{"0":1850,"1":1851,"2":1852,"3":1853,"4":1854,"5":1855,"6":1856,"7":1857,"8":1858,"9":1859,"10":1860},"old bailey":{"0":0.0000000002,"1":0.0000000002,"2":0.0000000001,"3":0.0000000001,"4":0.0000000001,"5":0.0000000001,"6":0.0000000001,"7":0.0000000001,"8":0.0000000001,"9":0.0000000001,"10":0.0000000002}}
Tool outputs submitted successfully.
SyncCursorPage[Message](data=[Message(id='msg_KYV0vyXpjnHs7LcTqIxZpif0', assistant_id='asst_NPJ7zjXnm6ShHHw5RuCldZDF', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='The trends for the word "Old Bailey" between the years 1850 to 1860 are as follows:\n\n- 1850: 0.0000000002\n- 1851: 0.0000000002\n- 1852: 0.0000000001\n- 1853: 0.0000000001\n- 1854: 0.0000000001\n- 1855: 0.0000000001\n- 1856: 0.0000000001\n- 1857: 0.0000000001\n- 1858: 0.0000000001\n- 1859: 0.0000000001\n- 1860: 0.0