In [20]:
import wandb
import weave
from openai import OpenAI
import os
import json

from dotenv import load_dotenv

In [13]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmikhailry[0m ([33mmikhailry-paylocity[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [14]:
def configure():
    load_dotenv()


In [15]:
# load configuration
configure()

In [16]:
client = OpenAI(api_key= os.getenv("OPENAI_API_KEY"))

In [17]:
# Weave will track the inputs, outputs and code of this function
@weave.op()
def extract_dinos(sentence: str) -> dict:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": """In JSON format extract a list of `dinosaurs`, with their `name`, 
their `common_name`, and whether its `diet` is a herbivore or carnivore"""
            },
            {
                "role": "user",
                "content": sentence
            }
            ],
            response_format={ "type": "json_object" }
        )
    return response.choices[0].message.content

In [18]:
# Initialise the weave project
weave.init('jurassic-park')

sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \
both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \
Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below."""

result = extract_dinos(sentence)
print(result)

Logged in as Weights & Biases user: mikhailry.
View Weave data at https://wandb.ai/mikhailry-paylocity/jurassic-park/weave
🍩 https://wandb.ai/mikhailry-paylocity/jurassic-park/r/call/0192caad-171a-7f22-9dda-454190f5b102
{
  "dinosaurs": [
    {
      "name": "Tyrannosaurus rex",
      "common_name": "T. rex",
      "diet": "carnivore"
    },
    {
      "name": "Triceratops",
      "common_name": "Trike",
      "diet": "herbivore"
    },
    {
      "name": "Brachiosaurus",
      "common_name": "Brachi",
      "diet": "herbivore"
    }
  ]
}


In [21]:
@weave.op()
def extract_dinos(sentence: str) -> dict:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": """Extract any dinorsaur `name`, their `common_name`, \
  names and whether its `diet` is a herbivore or carnivore, in JSON format."""
            },
            {
                "role": "user",
                "content": sentence
            }
            ],
            response_format={ "type": "json_object" }
        )
    return response.choices[0].message.content

@weave.op()
def count_dinos(dino_data: dict) -> int:
    # count the number of items in the returned list
    k = list(dino_data.keys())[0]
    return len(dino_data[k])

@weave.op()
def dino_tracker(sentence: str) -> dict:
    # extract dinosaurs using a LLM
    dino_data = extract_dinos(sentence)
    
    # count the number of dinosaurs returned
    dino_data = json.loads(dino_data)
    n_dinos = count_dinos(dino_data)
    return {"n_dinosaurs": n_dinos, "dinosaurs": dino_data}

weave.init('jurassic-park')

sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \
both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \
Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below."""

result = dino_tracker(sentence)
print(result)

🍩 https://wandb.ai/mikhailry-paylocity/jurassic-park/r/call/0192cb4b-387d-75b0-be62-06c10c39da07
{'n_dinosaurs': 3, 'dinosaurs': {'dinosaurs': [{'name': 'Tyrannosaurus rex', 'common_name': 'T. rex', 'diet': 'carnivore'}, {'name': 'Triceratops', 'common_name': 'Trike', 'diet': 'herbivore'}, {'name': 'Brachiosaurus', 'common_name': 'Brachi', 'diet': 'herbivore'}]}}


In [22]:
import weave 

weave.init('jurassic-park')

sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \
both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \
Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below."""

# track metadata alongside our previously defined function
with weave.attributes({'user_id': 'lukas', 'env': 'production'}):
    result = dino_tracker(sentence)

🍩 https://wandb.ai/mikhailry-paylocity/jurassic-park/r/call/0192cb58-a2d8-7ac0-89bd-7e078602f048


### Models

Models
A Model is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments.

In [24]:
from weave import Model
import weave

class RainyLLM(Model):
    attribute1: str
    attribute2: int

    @weave.op()
    def predict(self, input_data: str) -> dict:
        # Model logic goes here
        prediction = self.attribute1 + ' ' + input_data
        return {'pred': prediction}

In [25]:
import weave
weave.init('intro-example')

model = RainyLLM(attribute1='hello', attribute2=5)
model.predict('world')

Logged in as Weights & Biases user: mikhailry.
View Weave data at https://wandb.ai/mikhailry-paylocity/intro-example/weave
🍩 https://wandb.ai/mikhailry-paylocity/intro-example/r/call/0192cb61-c4f4-73f0-8159-fee1f0cd746e


{'pred': 'hello world'}

In [27]:
weave.init('intro-example')

model = RainyLLM(attribute1='howdy', attribute2=10)
model.predict('world')

🍩 https://wandb.ai/mikhailry-paylocity/intro-example/r/call/0192cb7e-72d9-79d0-92d3-803362449a2c


{'pred': 'howdy world'}

Task failed: SyntaxError: invalid syntax (<unknown>, line 1)
Task failed: SyntaxError: invalid syntax (<unknown>, line 1)
Task failed: SyntaxError: invalid syntax (<unknown>, line 1)
Task failed: SyntaxError: invalid syntax (<unknown>, line 1)
Task failed: SyntaxError: invalid syntax (<unknown>, line 1)
Task failed: SyntaxError: invalid syntax (<unknown>, line 1)


### DATASETS 
enable you to collect examples for evaluation and automatically track versions for accurate comparisons. Use this to download the latest version locally with a simple API.

In [28]:
import weave
from weave import Dataset

# Initialize Weave
weave.init('intro-example')

# Create a dataset
dataset = Dataset(name='grammar', rows=[
    {'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."},
    {'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."},
    {'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."}
])

# Publish the dataset
weave.publish(dataset)

# Retrieve the dataset
dataset_ref = weave.ref('grammar').get()

# Access a specific example
example_label = dataset_ref.rows[2]['sentence']

📦 Published to https://wandb.ai/mikhailry-paylocity/intro-example/weave/objects/grammar/versions/iaXZ0zJvJ6t4rIcqqavobD2sXIfwP4RcRNWIkYUyMyw


### Evaluation
The Evaluation class is designed to assess the performance of a Model on a given Dataset or set of examples using scoring functions.
To systematically improve your application, it's helpful to test your changes against a consistent dataset of potential inputs so that you catch regressions and can inspect your apps behaviour under different conditions.

In [37]:
import weave
from weave import Evaluation

# Collect your examples
examples = [
    {"question": "What is the capital of France?", "expected": "Paris"},
    {"question": "Who wrote 'To Kill a Mockingbird'?", "expected": "Harper Lee"},
    {"question": "What is the square root of 64?", "expected": "8"},
]

# Define any custom scoring function
@weave.op()
def exact_match(expected: str, model_output: dict) -> dict:
    # Here is where you'd define the logic to score the model output
    return {"match": expected == model_output}


# Score your examples using scoring functions
evaluation = Evaluation(
    dataset=examples,  # can be a list of dictionaries or a weave.Dataset object
    scorers=[exact_match],  # can be a list of scoring functions
    preprocess_model_input=lambda example: {"expected": example["expected"], "model_output": example["question"]}
)

# Start tracking the evaluation
weave.init('intro-example')
# Run the evaluation
summary = await evaluation.evaluate(exact_match)  # can be a model or simple function

🍩 https://wandb.ai/mikhailry-paylocity/intro-example/r/call/0192ce39-faf1-7851-9bf4-972ff3eac5ad


### Define a Model to evaluate

To evaluate a Model, call evaluate on it using an Evaluation. Models are used when you have attributes that you want to experiment with and capture in weave.

In [34]:
from weave import Model, Evaluation
import asyncio

class MyModel(Model):
    prompt: str

    @weave.op()
    def predict(self, question: str):
        # here's where you would add your LLM call and return the output
        return {'generated_text': 'Hello, ' + self.prompt}

model = MyModel(prompt='World')

evaluation = Evaluation(
    dataset=examples, scorers=[match_score1]
)
weave.init('intro-example') # begin tracking results with weave
asyncio.run(evaluation.evaluate(model))

RuntimeError: asyncio.run() cannot be called from a running event loop

### Tracing
allows you to track the inputs and outputs of functions seamlessly. 

In [39]:
import weave
import json
from openai import OpenAI

@weave.op()
def extract_fruit(sentence: str) -> dict:
    client = OpenAI()

    response = client.chat.completions.create(
    model="gpt-3.5-turbo-1106",
    messages=[
        {
            "role": "system",
            "content": "You will be provided with unstructured data, and your task is to parse it one JSON dictionary with fruit, color and flavor as keys."
        },
        {
            "role": "user",
            "content": sentence
        }
        ],
        temperature=0.7,
        response_format={ "type": "json_object" }
    )
    extracted = response.choices[0].message.content
    return json.loads(extracted)

weave.init('intro-example')
sentence = "There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy."

with weave.attributes({'user_id': 'lukas', 'env': 'production'}):
    extract_fruit(sentence)

🍩 https://wandb.ai/mikhailry-paylocity/intro-example/r/call/0192ce76-d748-7012-9907-5897a92f9871
