# Overview

- Call open ai to get chat completion
- Call open ai to get json completion
- wrapper function
- function to get question -> make query -> search -> summarize + show results
- one cell python server


In [1]:
%pip install python-dotenv openai wikipedia
from openai import OpenAI
import os
from dotenv import load_dotenv
load_dotenv()
oai = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Basics


In [2]:
import json
SYSTEM = "Answer concicsely and follow all directions provided"
# get a basic chat completion
def chat(msg: str, json = False, system: str = SYSTEM) -> str:
    messages = []
    fmt = "json_object" if json else "text"

    if system:
        messages.append({"role": "system", "content": system})
    messages.append(
        {
            "role": "user",
            "content": msg,
        }
    )
    chat_completion = oai.chat.completions.create(
        messages=messages,
        model="gpt-4o",
        response_format={"type": fmt}
    )
    content = chat_completion.choices[0].message.content
    if content == None:
        raise Exception('invalid response', chat_completion)
    print(f"Request: {system} - {msg} (JSON = {json})\nResponse: {content}")
    return content

def chat_json(msg: str, system: str = SYSTEM) -> dict:
    j = chat(msg, True, system)
    return json.loads(j)



In [3]:
print(chat("how are you"))
print(chat_json("how are you? Respond in json like {'response': '<text here>'}"))

Request: Answer concicsely and follow all directions provided - how are you (JSON = False)
Response: I'm an AI, so I don't have feelings, but thanks for asking! How can I assist you today?
I'm an AI, so I don't have feelings, but thanks for asking! How can I assist you today?
Request: Answer concicsely and follow all directions provided - how are you? Respond in json like {'response': '<text here>'} (JSON = True)
Response: {
  "response": "I'm an AI, so I don't have feelings, but I'm here and ready to help you!"
}
{'response': "I'm an AI, so I don't have feelings, but I'm here and ready to help you!"}


## Wikipedia Search

In [7]:
import wikipedia
def get_query(user_input: str) -> str:
    prompt = f"""
    Examples:
    input: Who was the first president of the united states?
    query: {{ 'query': 'united states presidents' }}

    input: What is the most popular dog breed in the united states?
    query: {{ 'query': 'popular dog breeds' }}

    input: What is MIT LL?
    query: {{ 'query': 'MIT Lincoln Laboratory' }}
    
    Response in JSON in the form of {{ 'query': '<user query here>'}}

    input: {user_input}
    query: 
    """

    response = chat_json(prompt)
    return response['query']

    
def get_keyword_query(user_input: str) -> str:
    prompt = f"""
    Pull out the keywords that the user should search for to 
    find results for this query.
    Also include the related phrases for each keyword.
    
    Examples:
    input: Who was the first president of the united states?
    query: {{ 'keywords': [{{ 'keyword': 'president', 'related': ['elections'] }}] }}

    Response in JSON in the form of {{ 'keywords': [ {{ 'keyword': '<word>', 'related': ['list'] }} ]}}

    input: {user_input}
    query: 
    """

    response = chat_json(prompt)
    return response['keywords']    
    
    


def summarize(q: str, summaries: list[tuple[str, str]]) -> str:
    system = "respond in concise well formatted markdown."
    def fmt(summary):
        return f"{summary[0]}: {summary[1]}"
    prompt = f"""
    Answer the following: {q}
    
    context:
    {"\n---\n".join(map(fmt, summaries))}

    Now answer the question: {q}
    Answer:
    """

    response = chat(prompt, json = False, system=system)
    return response

def answer_question(q: str):
    query = get_query(q)
    r = wikipedia.search(query)
    def get_summary(title):
        try:
            # Get the page summary for each title
            summary = wikipedia.summary(title, sentences=3)  # Adjust the number of sentences as needed
            return summary
        except wikipedia.DisambiguationError as e:
            # Handle disambiguation pages by skipping them or selecting a specific option
            print(f"Disambiguation page found for {title}, skipping.")
        except wikipedia.PageError as e:
            # Handle page errors (e.g., page does not exist)
            print(f"Page not found for {title}, skipping.")
        return None

    print(r)
    summaries = []
    for p in r:
        summary = get_summary(p)
        if summary != None:
            summaries.append([p, summary])
        if len(summaries) == 3:
            break
    summary = summarize(q, summaries)
    
    return (summary, summaries)


In [8]:
get_keyword_query('When was university of florida founded?')

Request: Answer concicsely and follow all directions provided - 
    Pull out the keywords that the user should search for to 
    find results for this query.
    Also include the related phrases for each keyword.
    
    Examples:
    input: Who was the first president of the united states?
    query: { 'keywords': [{ 'keyword': 'president', 'related': ['elections'] }] }

    Response in JSON in the form of { 'keywords': [ { 'keyword': '<word>', 'related': ['list'] } ]}

    input: When was university of florida founded?
    query: 
     (JSON = True)
Response: {
    "keywords": [
        {
            "keyword": "university of florida",
            "related": ["UF", "Florida university"]
        },
        {
            "keyword": "founded",
            "related": ["established", "started", "created"]
        }
    ]
}


[{'keyword': 'university of florida', 'related': ['UF', 'Florida university']},
 {'keyword': 'founded', 'related': ['established', 'started', 'created']}]

## Simple Webserver

In [6]:
!pip install FastAPI mistune


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [10]:
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
import mistune

app = FastAPI()

html = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Search Input</title>
  <link rel="stylesheet" href="https://unpkg.com/@picocss/pico@latest/css/pico.min.css">
  <style>
    body {
      display: flex;
      flex-direction: column;
      justify-content: center;
      align-items: center;
      height: 100vh;
    }
  </style>
</head>
<body>
  <form action="/search">
    <input type="search" name="search" placeholder="Search..." aria-label="Search">
  </form>
  <div>
    {{results}}
  </div>
</body>
</html>
"""

@app.get("/")
async def root():
    return HTMLResponse(html.replace("{{results}}", "<p>perform a search to see results</p>"), status_code=200)

@app.get("/search")
async def handle_search(search: str):
    q = search
    md, wiki = answer_question(q)
    md += "\n\nCitations:\n\n"+ "\n".join(map(lambda w : f"- {w[0]}\n    {w[1]}\n", wiki))
    htm = str(mistune.html(md))
    print(md)
    return HTMLResponse(html.replace("{{results}}", htm), status_code=200)

import uvicorn

if __name__ == "__main__":
    config = uvicorn.Config(app)
    server = uvicorn.Server(config)
    await server.serve()

INFO:     Started server process [1425558]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:41856 - "GET / HTTP/1.1" 200 OK
Request: Answer concicsely and follow all directions provided - 
    Examples:
    input: Who was the first president of the united states?
    query: { 'query': 'united states presidents' }

    input: What is the most popular dog breed in the united states?
    query: { 'query': 'popular dog breeds' }

    input: What is MIT LL?
    query: { 'query': 'MIT Lincoln Laboratory' }
    
    Response in JSON in the form of { 'query': '<user query here>'}

    input: Where did Obama go to undergraduate?
    query: 
     (JSON = True)
Response: { "query": "Obama undergraduate education" }
['List of presidents of the United States by education', 'Barack Obama', 'Family of Barack Obama', 'Ann Dunham', 'Early life and career of Barack Obama', 'Education in the United States', 'Title IX', 'Maya Soetoro-Ng', 'Barack Obama citizenship conspiracy theories', 'University of Hawaiʻi at Mānoa']
Request: respond in concise well formatted markdown. - 
 

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [1425558]
