# Example Routers you'll see in week 4

In [None]:
import openai
import instructor

from typing import Iterable, Literal
from pydantic import BaseModel


class Weather(BaseModel):
    location: str
    units: Literal["imperial", "metric"]


class GoogleSearch(BaseModel):
    query: str


client = instructor.from_openai(openai.OpenAI(), mode=instructor.Mode.PARALLEL_TOOLS)

function_calls = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You must always use tools"},
        {
            "role": "user",
            "content": "What is the weather in toronto and dallas and who won the super bowl?",
        },
    ],
    response_model=Iterable[Weather | GoogleSearch],
)

for fc in function_calls:
    print(fc)

# Extracting lists of items 

imagine you can extract these and write them to a db

In [8]:
from typing import List
from pydantic import BaseModel

class Fact(BaseModel):
    person: str
    statement: str

client = instructor.from_openai(openai.OpenAI())

facts = client.chat.completions.create_iterable(
    model="gpt-4-0613",
    messages=[
        {"role": "system", "content": "Extract facts about people."},
        {
            "role": "user",
            "content": "Once upon a time, there was a curious boy named Alex who loved to explore the woods near his home. One day, he discovered a hidden cave that seemed to glow with an otherworldly light. As he ventured deeper into the cave, he found a magical artifact that granted him the ability to talk to animals. From that day on, Alex's life was filled with incredible adventures and newfound friendships with the creatures of the forest.",
        },
    ],
    response_model=Fact,
    stream=True,
)

for fact in facts:
    print(fact)

person='Alex' statement='Alex was a curious boy who loved to explore the woods near his home.'
person='Alex' statement='He discovered a hidden cave that seemed to glow with an otherworldly light.'
person='Alex' statement='He found a magical artifact that granted him the ability to talk to animals.'
person='Alex' statement="Alex's life was filled with incredible adventures and newfound friendships with the creatures of the forest."


In [9]:
from typing import Optional
from pydantic import BaseModel

class FinancialStatement(BaseModel):
    revenue: float
    net_income: float
    earnings_per_share: float
    operating_expenses: Optional[float] = None
    cash_flow: Optional[float] = None

client = instructor.from_openai(openai.OpenAI())

financial_data = client.chat.completions.create(
    model="gpt-4-0613",
    messages=[
        {"role": "system", "content": "Extract financial data from the earnings report."},
        {
            "role": "user",
            "content": """
            Q2 2023 Earnings Report for TechCorp Inc.

            We are pleased to report strong financial results for the second quarter of 2023. Our total revenue reached an impressive $1.25 billion, demonstrating robust growth in our business operations. Our net income for the quarter stood at $320 million, reflecting our commitment to profitability and efficient resource management. Additionally, we achieved earnings per share of $2.15, which we believe will be well-received by our shareholders and the investment community.

            Our focus on cost management and operational efficiency continues to drive profitability.
            """,
        },
    ],
    response_model=FinancialStatement,
)

print(financial_data)


revenue=1250000000.0 net_income=320000000.0 earnings_per_share=2.15 operating_expenses=None cash_flow=None


here we also do some classification, perhaps thats something we want to filter for later in the router 

In [14]:
from typing import Literal
from pydantic import BaseModel

class SummarizedContent(BaseModel):
    title: str
    category: Literal["news", "research", "blog", "other"]
    summary: str
    entities: List[str]

client = instructor.from_openai(openai.OpenAI())

summarized_content = client.chat.completions.create(
    model="gpt-4-0613",
    messages=[
        {
            "role": "system",
            "content": "Summarize the given text with a title and summary. The summary should contain more abstract ideas while preserving numbers and named entities."
        },
        {
            "role": "user",
            "content": """
            In a groundbreaking study published in Nature, researchers at Stanford University have discovered a new species of deep-sea creature living in the Mariana Trench. The organism, named Mariana abyssalis, was found at a depth of 10,984 meters, making it one of the deepest-living creatures ever discovered. Dr. Emily Chen, the lead researcher, stated that this discovery could revolutionize our understanding of life in extreme environments. The creature's unique adaptations to high pressure and low temperature conditions may have implications for biotechnology and medical research. The team used advanced robotic submersibles to collect samples and conduct in-situ observations over a period of 18 months. This discovery is part of the Ocean Frontier Project, a $50 million initiative aimed at exploring the least known areas of our planet's oceans.
            """
        }
    ],
    response_model=SummarizedContent
)

print(summarized_content.model_dump_json(indent=2))

{
  "title": "New Deep-Sea Creature Discovered in Mariana Trench",
  "category": "research",
  "summary": "Scientists from Stanford University have identified a new species, Mariana abyssalis, in the Mariana Trench at a depth of 10,984 meters, marking it as one of the deepest dwellers of the sea. This discovery, a part of the Ocean Frontier Project, challenges our understanding of survival in harsh conditions and may have applications in biotechnology and medical research. The discovery utilized robotic submersibles over 18 months to gather data.",
  "entities": [
    "Stanford University",
    "Mariana abyssalis",
    "Mariana Trench",
    "Dr. Emily Chen",
    "Ocean Frontier Project"
  ]
}


In [None]:
{
    "title": "New Deep-Sea Creature Discovered in Mariana Trench",
    "category": "research",
    "summary": "Scientists from Stanford University have identified a new species, Mariana abyssalis, in the Mariana Trench at a depth of 10,984 meters, marking it as one of the deepest dwellers of the sea. This discovery, a part of the Ocean Frontier Project, challenges our understanding of survival in harsh conditions and may have applications in biotechnology and medical research. The discovery utilized robotic submersibles over 18 months to gather data.",
    "entities": [
        "Stanford University",
        "Mariana abyssalis",
        "Mariana Trench",
        "Dr. Emily Chen",
        "Ocean Frontier Project",
    ],
}

In [1]:
from openai import OpenAI
from io import StringIO
from typing import Annotated, Any, List
from pydantic import (
    BaseModel,
    BeforeValidator,
    PlainSerializer,
    InstanceOf,
    WithJsonSchema,
)
import instructor
import pandas as pd
from rich.console import Console

console = Console()
client = instructor.from_openai(
    client=OpenAI(),
    mode=instructor.Mode.TOOLS,
)


def md_to_df(data: Any) -> Any:
    if isinstance(data, str):
        return (
            pd.read_csv(
                StringIO(data),  # Get rid of whitespaces
                sep="|",
                index_col=1,
            )
            .dropna(axis=1, how="all")
            .iloc[1:]
            .map(lambda x: x.strip())
        )  # type: ignore
    return data


MarkdownDataFrame = Annotated[
    InstanceOf[pd.DataFrame],
    BeforeValidator(md_to_df),
    PlainSerializer(lambda x: x.to_markdown()),
    WithJsonSchema(
        {
            "type": "string",
            "description": """
                The markdown representation of the table, 
                each one should be tidy, do not try to join tables
                that should be seperate""",
        }
    ),
]


class Table(BaseModel):
    caption: str
    dataframe: MarkdownDataFrame


class MultipleTables(BaseModel):
    tables: List[Table]


example = MultipleTables(
    tables=[
        Table(
            caption="This is a caption",
            dataframe=pd.DataFrame(
                {
                    "Chart A": [10, 40],
                    "Chart B": [20, 50],
                    "Chart C": [30, 60],
                }
            ),
        )
    ]
)


def extract(url: str) -> MultipleTables:
    return client.chat.completions.create(
        model="gpt-4-turbo",
        max_tokens=4000,
        response_model=MultipleTables,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": url},
                    },
                    {
                        "type": "text",
                        "text": """
                            First, analyze the image to determine the most appropriate headers for the tables.
                            Generate a descriptive h1 for the overall image, followed by a brief summary of the data it contains. 
                            For each identified table, create an informative h2 title and a concise description of its contents.
                            Finally, output the markdown representation of each table.


                            Make sure to escape the markdown table properly, and make sure to include the caption and the dataframe.
                            including escaping all the newlines and quotes. Only return a markdown table in dataframe, nothing else.
                        """,
                    },
                ],
            }
        ],
    )


urls = [
    "https://a.storyblok.com/f/47007/2400x1260/f816b031cb/uk-ireland-in-three-charts_chart_a.png/m/2880x0",
    "https://a.storyblok.com/f/47007/2400x2000/bf383abc3c/231031_uk-ireland-in-three-charts_table_v01_b.png/m/2880x0",
]

for url in urls:
    for table in extract(url).tables:
        console.print(table.caption, "\n", table.dataframe)

In [3]:
resp = client.chat.completions.create(
        model="gpt-4o-mini",
        max_tokens=4000,
        response_model=str,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": url},
                    },
                    {
                        "type": "text",
                        "text": """
                            whats in this image
                        """,
                    },
                ],
            }
        ],
    )
print(resp)

This image displays the top 10 grossing apps in Ireland for October 2023, categorized by Android and iOS. 

**For Android:**
1. Google One (Productivity)
2. Disney+ (Entertainment)
3. TikTok - Videos, Music & LIVE (Entertainment)
4. Candy Crush Saga (Games)
5. Tinder: Dating, Chat & Friends (Social networking)
6. Coin Master (Games)
7. Roblox (Games)
8. Bumble - Dating & Make Friends (Dating)
9. Royal Match (Games)
10. Spotify: Music and Podcasts (Music & Audio)

**For iOS:**
1. Tinder: Dating, Chat & Friends (Social networking)
2. Disney+ (Entertainment)
3. YouTube: Watch, Listen, Stream (Entertainment)
4. Audible: Audio Entertainment (Entertainment)
5. Candy Crush Saga (Games)
6. TikTok - Videos, Music & LIVE (Entertainment)
7. Bumble - Dating & Make Friends (Dating)
8. Roblox (Games)
9. LinkedIn: Job Search & News (Business)
10. Duolingo - Language Lessons (Education) 

The source of the data is from SensorTower.


In [None]:
# note, lets find some examples of scenes and diagrams where "whats in this image" perform very different thant a specific prompt.