In [1]:
import weave

In [2]:
weave.init_sql_client()

GraphClientSql()

In [3]:
from weave.weaveflow import Dataset

In [4]:
d = Dataset([{'a': 5, 'b': 6}, {'a': 7, 'b': 10}])

In [5]:
import weave
import asyncio
from weave.weaveflow import Model, Evaluation, Dataset
import json

# We create a model class with one predict function. 
# All inputs, predictions and parameters are automatically captured for easy inspection.
@weave.type()
class ExtractFruitsModel(Model):
    system_message: str
    model_name: str = "gpt-3.5-turbo-1106"

    @weave.op()
    async def predict(self, sentence: str) -> str:
        from openai import OpenAI
        client = OpenAI()
        response = client.chat.completions.create(
            model=self.model_name,
            messages=[
                {
                    "role": "system",
                    "content": self.system_message
                },
                {
                    "role": "user",
                    "content": sentence
                }
            ],
            temperature=0.7,
            response_format={ "type": "json_object" }
        )
        extracted = response.choices[0].message.content
        return json.loads(extracted)

# We call init to begin capturing data in the project, intro-example.
weave.init_sql_client()

# We create our model with our system prompt.
model = ExtractFruitsModel("You will be provided with unstructured data, and your task is to parse it one JSON dictionary with fruit, color and flavor as keys.")
sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.", 
"Pounits are a bright green color and are more savory than sweet.", 
"Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."]
labels = [
    {'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'},
    {'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'},
    {'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'}
]
# Here, we track a Dataset in weave. This makes it easy to 
# automatically score a given model and compare outputs from different configurations.
dataset = Dataset([
    {'id': '0', 'sentence': sentences[0], 'extracted': labels[0]},
    {'id': '1', 'sentence': sentences[1], 'extracted': labels[1]},
    {'id': '2', 'sentence': sentences[2], 'extracted': labels[2]}
])
dataset_ref = weave.publish(dataset, 'example_labels')
# If you have already published the Dataset, you can run:
# dataset = weave.ref('example_labels').get()

# We define two scoring functions to compare our model predictions with a ground truth label.
@weave.op()
def color_score(example: dict, prediction: dict) -> dict:
    # example is a row from the Dataset, prediction is the output of predict function
    return {'correct': example['extracted']['color'] == prediction['color']}

@weave.op()
def fruit_name_score(example: dict, prediction: dict) -> dict:
    return {'correct': example['extracted']['fruit'] == prediction['fruit']}

@weave.op()
def example_to_model_input(example: dict) -> str:
    # example is a row from the Dataset, the output of this function should be the input to model.predict.
    return example["sentence"]

# Finally, we run an evaluation of this model. 
# This will generate a prediction for each input example, and then score it with each scoring function.
evaluation = Evaluation(
    dataset, scores=[color_score, fruit_name_score], example_to_model_input=example_to_model_input
)
#print(asyncio.run(evaluation.evaluate(model)))
# if you're in a Jupyter Notebook, run:
await evaluation.evaluate(model)

Published Dataset to [no url for obj]


Output()

🍩 View call: <UI URL NOT IMPLEMENTED>


{'color_score': {'correct': {'true_count': 3, 'true_fraction': 1.0}},
 'fruit_name_score': {'correct': {'true_count': 2,
   'true_fraction': 0.6666666666666666}}}