In [None]:
# !pip install arize-phoenix

In [None]:
# from llama_index.llms.ollama import Ollama
#
# llm = Ollama(model="llama3", request_timeout=60.0)
#
# response = llm.complete("What is the capital of France?")
# print(response)

In [None]:
from datetime import datetime, timezone

import phoenix as px

In [None]:
from datasets import load_dataset

sample_size = 5
path = "nvidia/ChatQA-Training-Data"
name = "synthetic_convqa"
df = load_dataset(path, name, split="train").to_pandas().sample(sample_size, random_state=42)
df

In [None]:
px.Client().upload_dataset_examples(
    df,
    input_keys=("messages", "document"),
    output_keys=("answers",),
    name=datetime.now(timezone.utc).isoformat(),
)

In [None]:
ds = px.Client().get_dataset("RGF0YXNldDox")
type(ds)

In [None]:
from contextlib import contextmanager
from threading import Thread
from time import sleep, time
from typing import Awaitable, Callable, Iterator

from portpicker import pick_unused_port
from starlette.applications import Starlette
from starlette.responses import JSONResponse, Response
from starlette.routing import Request, Route
from uvicorn import Config, Server


async def hello(_: Request) -> Response:
    return JSONResponse(
        content={
            "id": "chatcmpl-123",
            "object": "chat.completion",
            "created": 1677652288,
            "model": "gpt-3.5-turbo-0125",
            "system_fingerprint": "fp_44709d6fcb",
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "\n\nHello there, how may I assist you today?",
                    },
                    "finish_reason": "stop",
                }
            ],
            "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
        }
    )


class Receiver:
    def __init__(self, chat_completion: Callable[[Request], Awaitable[Response]]) -> None:
        self.app = Starlette(
            routes=[
                Route("/v1/chat/completions", chat_completion, methods=["POST"]),
            ]
        )

    def install_signal_handlers(self) -> None:
        pass

    @contextmanager
    def run_in_thread(self, port: int) -> Iterator[None]:
        """A coroutine to keep the server running in a thread."""
        config = Config(app=self.app, port=port, loop="asyncio", log_level="critical")
        server = Server(config=config)
        thread = Thread(target=server.run)
        thread.start()
        time_limit = time() + 5  # 5 seconds
        try:
            while not server.started and thread.is_alive() and time() < time_limit:
                sleep(1e-3)
            if time() > time_limit:
                raise RuntimeError("server took too long to start")
            yield
        finally:
            server.should_exit = True
            thread.join(timeout=5)

In [None]:
from typing import Any, Mapping

import openai

port = pick_unused_port()
client = openai.OpenAI(api_key="sk-", base_url=f"http://localhost:{port}/v1/")


def task(record: Mapping[str, Any]):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=record["messages"],
        max_tokens=20,
    )
    return response.choices[0].message.content

In [None]:
from phoenix.datasets.decorators import execution_sequence

type(execution_sequence.get())

In [None]:
from phoenix.datasets.experiments import run_experiment

with Receiver(hello).run_in_thread(port):
    run_experiment(ds, task)

In [None]:
import openai
import wikipedia as wp
from phoenix.datasets.decorators import io_capture

openai = openai.OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")


@io_capture
def generate_wiki_search(question):
    messages = [
        {
            "role": "system",
            "content": "Generate a search query to pass into wikipedia to answer the user's question. Return only the search query and nothing more. This will passed in directly to the wikipedia search engine.",
        },
        {"role": "user", "content": question},
    ]
    result = openai.chat.completions.create(messages=messages, model="gpt-3.5-turbo", temperature=0)
    return result.choices[0].message.content


@io_capture
def retrieve(query):
    results = []
    for term in wp.search(query, results=10):
        try:
            page = wp.page(term, auto_suggest=False)
            results.append(
                {"page_content": page.summary, "type": "Document", "metadata": {"url": page.url}}
            )
        except wp.DisambiguationError:
            pass
        if len(results) >= 2:
            return results


@io_capture
def generate_answer(question, context):
    messages = [
        {
            "role": "system",
            "content": f"Answer the user's question based ONLY on the content below:\n\n{context}",
        },
        {"role": "user", "content": question},
    ]
    result = openai.chat.completions.create(messages=messages, model="gpt-3.5-turbo", temperature=0)
    return result.choices[0].message.content


def rag_pipeline(question):
    query = generate_wiki_search(question)
    context = "\n\n".join([doc["page_content"] for doc in retrieve(query)])
    answer = generate_answer(question, context)
    return answer

In [None]:
run_experiment(ds, task)