In [44]:
from openai import AsyncOpenAI
import weave
from weave import Model, Evaluation
from pydantic import field_validator
from configparser import ConfigParser
import json
from pathlib import Path
import random
import asyncio

In [80]:
config_parser = ConfigParser()
config_parser.read("config.cfg")
LLAMA_KEY = config_parser.get("DEFAULT", "LLAMA_KEY")

In [46]:
with Path("data/centerEmbed/ce1.json").open(encoding="UTF-8") as source:
     objects = json.load(source)


examples = [
    {
        "id": i,
        "context": ex["Context"],
        "question": ex["Q"],
        "target": ex["A"],
        "level": ex["level"],
    }
for i, ex in enumerate(objects)]


In [47]:
class Llama(Model):

    model_name : str
    prompt_template : str

    @property
    def api_key(self):
        return LLAMA_KEY

    @property
    def api(self):
        return AsyncOpenAI(
            api_key=self.api_key, 
            base_url="https://api.llama-api.com"
        )
    

    def format(self, context : str, question : str, params : dict, **kwargs) -> dict:

        prompt = self.prompt_template.format(context=context, question=question)
        return {
            "messages": [
                {"role": "user", "content": prompt},
            ],
            **params,
            **kwargs
        }

    @weave.op()
    async def predict(
        self,
        context : str,
        question : str,
        params : dict = {},
        **kwargs
    ):
        with weave.attributes(kwargs):

            payload = self.format(context, question, params)

            response = await self.api.chat.completions.create(
                model=self.model_name, 
                **payload
            )
            if response is None:
                raise ValueError("No response from model")

            result = response.choices[0].message.content
            return result

In [48]:
PROMPT_TEMPLATE = """You will be given an example consisting of a context and a question to answer. The answer should always be of this form "The N V the N", where N stands for a single word that is a noun, and V stands for a single word that is a verb. 
Here are two samples:

        "Context": "The student the man noticed seemed happy",
        "Question": "Who saw who?",
        "Answer": "The man saw the student.",


        "Context": "The teacher the student saw hit is dead",
        "Question": "Who saw who?",
        "Answer": "The student saw the teacher.",

Context: {context}
Question: {question}

Now answer the question:
"""

In [49]:
model = Llama(
    name="llama-7b-chat",
    description="Weave model for Llama",
    model_name="llama-7b-chat",
    prompt_template=PROMPT_TEMPLATE
)

In [50]:
def postprocess(string):
    return string.strip().lower()

@weave.op()
def evaluator(target: str, model_output: str) -> dict:
    model_output = postprocess(model_output)
    return {'correct': target == model_output}

In [51]:
weave.init(f"llama-7b-chat-CE1-n{len(sample)}-test3")

Logged in as Weights & Biases user: nthomsen.
View Weave data at https://wandb.ai/cbs-nlp/llama-7b-chat-CE1-n10-test3/weave




In [75]:
################
# SAMPLE EXAMPLES
################

sample: list = random.sample(examples, 10)

################
# RUN EVALUATION
################



evaluation = weave.Evaluation(
    dataset=sample,
    scorers=[evaluator],
    trials=1,

)

output = await evaluation.evaluate(model)

🍩 https://wandb.ai/cbs-nlp/llama-7b-chat-CE1-n10-test3/r/call/8bc34030-4f1d-45f9-89ef-2b6b46ffb739


In [78]:
evaluation

Evaluation(name=None, description=None, dataset=Dataset(name=None, description=None, rows=<weave.table.Table object at 0x16c41b0b0>), scorers=[Op(evaluator)], preprocess_model_input=None, trials=1)

In [74]:
import wandb

api = wandb.Api()
run = api.run("weave:///cbs-nlp/llama-7b-chat-ce1-n10-test3/YcUMmEPIGcjJ7St4kEsTBqhjoq2yEwiVWYFBMKNNw0")

CommError: Could not find run <Run weave%3A//YcUMmEPIGcjJ7St4kEsTBqhjoq2yEwiVWYFBMKNNw0 (not found)>

In [66]:
import wandb

api = wandb.Api()

run = api.run("https://wandb.github.io/weave/guides/core-types/evaluations")
if run.state == "finished":
    for i, row in run.history().iterrows():
        print(row["_timestamp"], row["accuracy"])

In [67]:
data = pd.read_csv("project.csv")

In [68]:
data

Unnamed: 0.1,Unnamed: 0,summary,config,name


In [28]:
import wandb

api = wandb.Api()
# run = api.run("<entity>/<project>/<run_id>")

In [29]:
api.run("weave/llama-7b-chat-CE1-n10-test2-1")

CommError: Could not find run <Run cbs-nlp/weave/llama-7b-chat-CE1-n10-test2-1 (not found)>