In [44]:
from openai import OpenAI
import weave
from weave import Model, Evaluation
from pydantic import field_validator
from configparser import ConfigParser
import json
from pathlib import Path

In [2]:
config_parser = ConfigParser()
config_parser.read("config.cfg")
LLAMA_KEY = config_parser.get("DEFAULT", "LLAMA_KEY")

In [15]:
with Path("data/centerEmbed/ce1.json").open(encoding="UTF-8") as source:
     objects = json.load(source)

In [133]:
class Llama(Model):

    model_name : str
    api_key : str
    prompt_template : str

    @property
    def api(self):
        return OpenAI(
            api_key=self.api_key, 
            base_url="https://api.llama-api.com"
        )
    

    def format(self, context : str, question : str, params : dict, **kwargs) -> dict:

        prompt = self.prompt_template.format(context=context, question=question)
        return {
            "messages": [
                {"role": "user", "content": prompt},
            ],
            **params,
            **kwargs
        }

    @weave.op()
    async def predict(self, context : str, question : str, params : dict = {}, **kwargs):

        payload = self.format(context, question, params, **kwargs)

        response = await self.api.chat.completions.create(
            model=self.model_name, 
            **payload
        )
        if response is None:
            raise ValueError("No response from model")

        result = response.choices[0].message.content
        return result

In [134]:
PROMPT_TEMPLATE = """You will be given an example consisting of a context and a question to answer. The answer should always be of this form "The N V the N", where N stands for a single word that is a noun, and V stands for a single word that is a verb. 
Here are two samples:

        "Context": "The student the man noticed seemed happy",
        "Question": "Who saw who?",
        "Answer": "The man saw the student.",


        "Context": "The teacher the student saw hit is dead",
        "Question": "Who saw who?",
        "Answer": "The student saw the teacher.",


Context: {context}
Question: {question}

Now answer the question:
"""

In [135]:
model = Llama(
    name="llama-7b-chat",
    description="Weave model for Llama",
    model_name="llama-7b-chat",
    api_key=LLAMA_KEY,
    prompt_template=PROMPT_TEMPLATE
)

In [136]:
examples_sample = objects[:10]

In [137]:
examples = []
for i, ex in enumerate(examples_sample):

    examples.append(
        {
            "id": i,
            "context": ex["Context"],
            "question": ex["Q"],
            "target": ex["A"]
        }
    )


In [138]:
# Define any custom scoring function
@weave.op()
def evaluator(target: dict, model_output: dict) -> dict:
    # Here is where you'd define the logic to score the model output
    return {'correct': target == model_output}


In [139]:
import random
import asyncio

In [140]:
objects[:2]

[{'Context': 'The teacher the student saw is happy',
  'Q': 'Who saw who?',
  'A': 'the student saw the teacher.',
  'level': '1'},
 {'Context': 'The teacher the student saw left',
  'Q': 'Who saw who?',
  'A': 'the student saw the teacher.',
  'level': '1'}]

In [141]:
weave.init("first_eval4")


################
# SAMPLE EXAMPLES
################

sample: list = random.sample(examples, 10)

################
# RUN EVALUATION
################

evaluation = weave.Evaluation(
    dataset=sample,
    scorers=[evaluator]
)

await evaluation.evaluate(model)

Logged in as Weights & Biases user: nthomsen.
View Weave data at https://wandb.ai/cbs-nlp/first_eval4/weave


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


Traceback (most recent call last):
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/flow/eval.py", line 110, in predict_and_score
    model_output = await async_call(model_predict, **model_predict_args)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/nicolai/Desktop/cbs/research/CenterEmbedding/centerembed/lib/python3.12/site-packages/weave/trace/op.py", line 113, in _run_async
    output = await awaited_res
             ^^^^^^^^^^^^^^^^^
  File "/var/folders/zm/ngjbkxbs3zv0psd22_khcy2w0000gn/T/ipykernel_11420/2386259436.py", line 31, in predict
    response = await self.api.chat.completions.create(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object ChatCompletion can't be used in 'await' expression


🍩 https://wandb.ai/cbs-nlp/first_eval4/r/call/32d4ad73-59cc-4953-8c9a-9f93939d9d55


{'evaluator': {'correct': {'true_count': 0, 'true_fraction': 0.0}},
 'model_latency': {'mean': 2.595898175239563}}