In [1]:
from datetime import datetime
from typing import Tuple

from pydantic import BaseModel

In [2]:
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: Tuple[int, int]


m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=[10, 20])

print(repr(m.timestamp))

print(m.dimensions)

datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
(10, 20)


In [5]:
# !pip install graphviz
# !pip install instructor

In [3]:
import instructor
from openai import OpenAI
from pydantic import BaseModel

# This enables response_model keyword
# from client.chat.completions.create
client = instructor.patch(OpenAI())

class UserDetail(BaseModel):
    name: str
    age: int

user = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=UserDetail,
    messages=[
        {"role": "user", "content": "Extract Jason is 25 years old"},
    ]
)

assert isinstance(user, UserDetail)
assert user.name == "Jason"
assert user.age == 25

In [4]:
import instructor
from pydantic import BaseModel

client = instructor.patch(OpenAI())

class UserExtract(BaseModel):
    name: str
    age: int

model = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=UserExtract,
    messages=[
        {"role": "user", "content": "Extract jason is 25 years old"},
    ],
)

assert isinstance(model, UserExtract)

# Query Decomposition

In [5]:
import enum
from typing import List

from pydantic import Field


class QueryType(str, enum.Enum):
    """Enumeration representing the types of queries that can be asked to a question answer system."""

    SINGLE_QUESTION = "SINGLE"
    MERGE_MULTIPLE_RESPONSES = "MERGE_MULTIPLE_RESPONSES"


class Query(BaseModel):
    """Class representing a single question in a query plan."""

    id: int = Field(..., description="Unique id of the query")
    question: str = Field(
        ...,
        description="Question asked using a question answering system",
    )
    dependancies: List[int] = Field(
        default_factory=list,
        description="List of sub questions that need to be answered before asking this question",
    )
    node_type: QueryType = Field(
        default=QueryType.SINGLE_QUESTION,
        description="Type of question, either a single question or a multi-question merge",
    )


class QueryPlan(BaseModel):
    """Container class representing a tree of questions to ask a question answering system."""

    query_graph: List[Query] = Field(
        ..., description="The query graph representing the plan"
    )

    def _dependencies(self, ids: List[int]) -> List[Query]:
        """Returns the dependencies of a query given their ids."""
        return [q for q in self.query_graph if q.id in ids]

In [8]:
import instructor
from openai import OpenAI

# Apply the patch to the OpenAI client
# enables response_model keyword
client = instructor.patch(OpenAI())

def query_planner(question: str) -> QueryPlan:
    PLANNING_MODEL = "gpt-4-0125-preview"

    messages = [
        {
            "role": "system",
            "content": "You are a world class query planning algorithm capable ofbreaking apart questions into its dependency queries such that the answers can be used to inform the parent question. Do not answer the questions, simply provide a correct compute graph with good specific questions to ask and relevant dependencies. Before you call the function, think step-by-step to get a better understanding of the problem.",
        },
        {
            "role": "user",
            "content": f"Consider: {question}\nGenerate the correct query plan.",
        },
    ]

    response = client.chat.completions.create(
        model=PLANNING_MODEL,
        temperature=0,
        response_model=QueryPlan,
        messages=messages,
        max_tokens=1000,
    )
    return response

In [9]:
plan = query_planner(
    "What is the difference in populations of Canada and the Jason's home country?"
)
plan.dict()

{'query_graph': [{'id': 1,
   'question': 'What is the population of Canada?',
   'dependancies': [],
   'node_type': <QueryType.SINGLE_QUESTION: 'SINGLE'>},
  {'id': 2,
   'question': "What is Jason's home country?",
   'dependancies': [],
   'node_type': <QueryType.SINGLE_QUESTION: 'SINGLE'>},
  {'id': 3,
   'question': 'What is the population of {country}?',
   'dependancies': [2],
   'node_type': <QueryType.SINGLE_QUESTION: 'SINGLE'>}]}

# Multi-file creation

In [10]:
from typing import List
from pydantic import Field
from pydantic import BaseModel


class File(BaseModel):
    """
    Correctly named file with contents.
    """

    file_name: str = Field(
        ..., description="The name of the file including the extension"
    )
    body: str = Field(..., description="Correct contents of a file")

    def save(self):
        with open(self.file_name, "w") as f:
            f.write(self.body)


class Program(BaseModel):
    """
    Set of files that represent a complete and correct program
    """

    files: List[File] = Field(..., description="List of files")
    

import instructor
from openai import OpenAI

# Apply the patch to the OpenAI client
# enables response_model keyword
client = instructor.patch(OpenAI())


def develop(data: str) -> Program:
    return client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        temperature=0.1,
        response_model=Program,
        messages=[
            {
                "role": "system",
                "content": "You are a world class programming AI capable of writing correct python scripts and modules. You will name files correct, include __init__.py files and write correct python code with correct imports.",
            },
            {
                "role": "user",
                "content": data,
            },
        ],
        max_tokens=1000,
    )

program = develop(
        """
        Create a fastapi app with a readme.md file and a main.py file with
        some basic math functions. the datamodels should use pydantic and
        the main.py should use fastapi. the readme.md should have a title
        and a description. The readme should contain some helpful infromation
        and a curl example"""
    )

for file in program.files:
    print(file.file_name)
    print("-")
    print(file.body)
    print("\n\n\n")

readme.md
-
# FastAPI App

This is a FastAPI app that provides some basic math functions.

## Usage

To use this app, follow the instructions below:

1. Install the required dependencies by running `pip install -r requirements.txt`.
2. Start the app by running `uvicorn main:app --reload`.
3. Open your browser and navigate to `http://localhost:8000/docs` to access the Swagger UI documentation.

## Example

To perform a basic math operation, you can use the following curl command:

```bash
curl -X POST -H "Content-Type: application/json" -d '{"operation": "add", "a": 2, "b": 3}' http://localhost:8000/calculate
```





main.py
-
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()


class Calculation(BaseModel):
    operation: str
    a: float
    b: float


@app.post('/calculate')
async def calculate(calculation: Calculation):
    if calculation.operation == 'add':
        result = calculation.a + calculation.b
    elif calculation.operation == 'subtract':
       

In [1]:
# !pip install generate

In [2]:
# adding refactoring
from pydantic import Field, BaseModel

class Diff(BaseModel):
    """
    Changes that must be correctly made in a program's code repository defined as a
    complete diff (Unified Format) file which will be used to `patch` the repository.

    Example:
      --- /path/to/original timestamp
      +++ /path/to/new  timestamp
      @@ -1,3 +1,9 @@
      +This is an important
      +notice! It should
      +therefore be located at
      +the beginning of this
      +document!
      +
       This part of the
       document has stayed the
       same from version to
      @@ -8,13 +14,8 @@
       compress the size of the
       changes.
      -This paragraph contains
      -text that is outdated.
      -It will be deleted in the
      -near future.
      -
       It is important to spell
      -check this dokument. On
      +check this document. On
       the other hand, a
       misspelled word isn't
       the end of the world.
      @@ -22,3 +23,7 @@
       this paragraph needs to
       be changed. Things can
       be added after it.
      +
      +This paragraph contains
      +important new additions
      +to this document.
    """

    diff: str = Field(
        ...,
        description=(
            "Changes in a code repository correctly represented in 'diff' format, "
            "correctly escaped so it could be used in a JSON"
        ),
    )



def refactor(new_requirements: str, program: Program) -> Diff:
    program_description = "\n".join(
        [f"{code.file_name}\n[[[\n{code.body}\n]]]\n" for code in program.files]
    )
    return client.chat.completions.create(
        # model="gpt-3.5-turbo-0613",
        model="gpt-4",
        temperature=0,
        response_model=Diff,
        messages=[
            {
                "role": "system",
                "content": "You are a world class programming AI capable of refactor "
                "existing python repositories. You will name files correct, include "
                "__init__.py files and write correct python code, with correct imports. "
                "You'll deliver your changes in valid 'diff' format so that they could "
                "be applied using the 'patch' command. "
                "Make sure you put the correct line numbers, "
                "and that all lines that must be changed are correctly marked.",
            },
            {
                "role": "user",
                "content": new_requirements,
            },
            {
                "role": "user",
                "content": program_description,
            },
        ],
        max_tokens=1000,
    )

changes = refactor(
    new_requirements="Refactor this code to use flask instead.",
    program=program,
)
print(changes.diff)

NameError: name 'Program' is not defined

# Answering Questions with Validated Citations

This example shows how to use Instructor with validators to not only add citations to answers generated but also prevent hallucinations by ensuring that every statement made by the LLM is backed up by a direct quote from the context provided, and that those quotes exist!.Two Python classes, Fact and QuestionAnswer, are defined to encapsulate the information of individual facts and the entire answer, respectively.



In [41]:
from pydantic import Field, BaseModel, model_validator, FieldValidationInfo
from typing import List
import re

class Fact(BaseModel):
    fact: str = Field(...)
    substring_quote: List[str] = Field(...)

    @model_validator(mode="after")
    def validate_sources(self, info: FieldValidationInfo) -> "Fact":
        text_chunks = info.context.get("text_chunk", None)
        spans = list(self.get_spans(text_chunks))
        self.substring_quote = [text_chunks[span[0] : span[1]] for span in spans]
        return self

    def get_spans(self, context):
        for quote in self.substring_quote:
            yield from self._get_span(quote, context)

    def _get_span(self, quote, context):
        for match in re.finditer(re.escape(quote), context):
            yield match.span()

class QuestionAnswer(BaseModel):
    question: str = Field(...)
    answer: List[Fact] = Field(...)
    
    @model_validator(mode="after")
    def validate_sources(self) -> "QuestionAnswer":
        self.answer = [fact for fact in self.answer if len(fact.substring_quote) > 0]
        return self

from openai import OpenAI
import instructor

client = instructor.patch(OpenAI())

def ask_ai(question: str, context: str) -> QuestionAnswer:
    return client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        temperature=0,
        response_model=QuestionAnswer,
        messages=[{
            "role": "system", "content": "You are a world class algorithm to answer questions with correct and exact citations."},
            {"role": "user", "content": f"{context}"},
            {"role": "user", "content": f"Question: {question}"}
        ],
        validation_context={"text_chunk": context},
    )
    
    question = "What did the author do during college?"
context = """
My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.
I went to an arts high school but in university I studied Computational Mathematics and physics.
As part of coop I worked at many companies including Stitchfix, Facebook.
I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
"""
ask_ai(question, context)

QuestionAnswer(question='What did the author do during college?', answer=[Fact(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics.']), Fact(fact='The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.', substring_quote=['started the Data Science club at the University of Waterloo', 'president of the club for 2 years.'])])