Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Add support for Ollama assistants #376

Merged
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
868febb
Added almost empty `OllamaApiAssistant`
smokestacklightnin Mar 22, 2024
a0c8499
Add `_make_system_content` method
smokestacklightnin Mar 22, 2024
ae0720a
Add preliminary (untested) `_call_api` method
smokestacklightnin Mar 22, 2024
eeba8a1
Using JSONL for responses
smokestacklightnin Apr 2, 2024
420c9e8
Add kwargs for compatibility and TODO messages to remove in a future …
smokestacklightnin Mar 30, 2024
20fb764
Add Ollama gemma:2b model
smokestacklightnin Mar 30, 2024
906f2a1
Fix `OllamaApiAssistant._call_api` signature by adding types
smokestacklightnin Mar 31, 2024
50f19a1
Add temperature option
smokestacklightnin Mar 31, 2024
0ce77d8
Add `_assert_api_call_is_success()`
smokestacklightnin Apr 7, 2024
7bbbafb
Add `answer()`
smokestacklightnin Apr 7, 2024
301c815
Add `__init__()`
smokestacklightnin Apr 7, 2024
a4a2608
Set url through initializer or environment variable
smokestacklightnin Apr 7, 2024
14e14c5
Add `is_available()`
smokestacklightnin Apr 7, 2024
0cae498
Rename Gemma2B to OllamaGemma2B
smokestacklightnin Apr 7, 2024
d02b501
Remove unnecessary `else` clause
smokestacklightnin Apr 10, 2024
1ce1982
Handle error in http response
smokestacklightnin Apr 10, 2024
fd5c34b
Remove unnecessary `_call_api()` abstraction
smokestacklightnin Apr 10, 2024
6f2055c
Fix typing errors
smokestacklightnin Apr 10, 2024
e5e8e30
Add docstring
smokestacklightnin Apr 10, 2024
72161a0
Add `OllamaPhi2`
smokestacklightnin Apr 10, 2024
c5e79e0
Remove unnecessary exclusion from test
smokestacklightnin Apr 10, 2024
f6edb19
Simplify check for availability of Ollama model
smokestacklightnin Apr 10, 2024
6460d1e
Simplify call to superclass `is_available()`
smokestacklightnin Apr 10, 2024
c9b2e01
Correct incorrect grammar on system instruction
smokestacklightnin Apr 10, 2024
086ce23
Add several Ollama models
smokestacklightnin Apr 11, 2024
9724dd6
Order alphabetically
smokestacklightnin Apr 11, 2024
9bebbb0
Add Ollama to listings in docs
smokestacklightnin Apr 12, 2024
bc211d3
Merge branch 'main' into assistants/ollama/basic-functionality
pmeier May 28, 2024
4a737e0
refactor streaming again
pmeier May 28, 2024
3e2a682
more
pmeier May 28, 2024
5a4d89d
fix
pmeier May 28, 2024
9de0920
cleanup
pmeier May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions ragna/assistants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"CommandLight",
"GeminiPro",
"GeminiUltra",
"OllamaGemma2b",
"OllamaPhi2",
"Gpt35Turbo16k",
"Gpt4",
"Jurassic2Ultra",
Expand All @@ -19,6 +21,7 @@
from ._demo import RagnaDemoAssistant
from ._google import GeminiPro, GeminiUltra
from ._mosaicml import Mpt7bInstruct, Mpt30bInstruct
from ._ollama import OllamaGemma2B, OllamaPhi2
from ._openai import Gpt4, Gpt35Turbo16k

# isort: split
Expand Down
112 changes: 112 additions & 0 deletions ragna/assistants/_ollama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import contextlib
import json
import os
from typing import AsyncIterator, cast

import httpx
from httpx import Response

import ragna
from ragna.core import Assistant, RagnaException, Source


class OllamaApiAssistant(Assistant):
_MODEL: str

@classmethod
def display_name(cls) -> str:
return f"Ollama/{cls._MODEL}"

def __init__(self, url: str = "http://localhost:11434/api/chat") -> None:
self._client = httpx.AsyncClient(
headers={"User-Agent": f"{ragna.__version__}/{self}"},
timeout=60,
)
self._url = os.environ.get("RAGNA_ASSISTANTS_OLLAMA_URL", url)

@classmethod
def is_available(cls) -> bool:
requirements_available = super().is_available()
if not requirements_available:
return False

try:
response = httpx.get("http://localhost:11434/")
response.raise_for_status()
except httpx.HTTPError:
return False

return response.is_success
smokestacklightnin marked this conversation as resolved.
Show resolved Hide resolved

def _make_system_content(self, sources: list[Source]) -> str:
instruction = (
"You are an helpful assistants that answers user questions given the context below. "
"If you don't know the answer, just say so. Don't try to make up an answer. "
"Only use the following sources to generate the answer."
)
return instruction + "\n\n".join(source.content for source in sources)

async def _assert_api_call_is_success(self, response: Response) -> None:
if response.is_success:
return

content = await response.aread()
with contextlib.suppress(Exception):
content = json.loads(content)

raise RagnaException(
"API call failed",
request_method=response.request.method,
request_url=str(response.request.url),
response_status_code=response.status_code,
response_content=content,
)

async def answer(
self, prompt: str, sources: list[Source], *, max_new_tokens: int = 256
) -> AsyncIterator[str]:
async with self._client.stream(
"POST",
self._url,
headers={
"Content-Type": "application/json",
},
json={
"messages": [
{
"role": "system",
"content": self._make_system_content(sources),
},
{
"role": "user",
"content": prompt,
},
],
"model": self._MODEL,
"stream": True,
"temperature": 0.0,
},
) as response:
await self._assert_api_call_is_success(response)

async for chunk in response.aiter_lines():
# This part modeled after https://github.com/ollama/ollama/blob/06a1508bfe456e82ba053ea554264e140c5057b5/examples/python-loganalysis/readme.md?plain=1#L57-L62
if chunk:
json_data = json.loads(chunk)

if "error" in json_data:
raise RagnaException(json_data["error"])
if not json_data["done"]:
yield cast(str, json_data["message"]["content"])
Copy link
Member

@pmeier pmeier May 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nenb This violates "same response schema" part of #425 (comment) 😖



class OllamaGemma2B(OllamaApiAssistant):
"""[Gemma:2B](https://ollama.com/library/gemma)"""

_MODEL = "gemma:2b"


class OllamaPhi2(OllamaApiAssistant):
"""[Phi-2](https://ollama.com/library/phi)"""

_MODEL = "phi"
5 changes: 5 additions & 0 deletions tests/assistants/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
from ragna.core import RagnaException
from tests.utils import skip_on_windows

EXCLUDE_ASSISTANTS = [
smokestacklightnin marked this conversation as resolved.
Show resolved Hide resolved
assistants._ollama.OllamaApiAssistant,
]

API_ASSISTANTS = [
assistant
for assistant in assistants.__dict__.values()
if isinstance(assistant, type)
and issubclass(assistant, ApiAssistant)
and assistant is not ApiAssistant
and not any(issubclass(assistant, to_skip) for to_skip in EXCLUDE_ASSISTANTS)
]


Expand Down