In [1]:
from pathlib import Path

from dotenv import load_dotenv

assert load_dotenv(Path.cwd() / ".env")

In [2]:
from ragna.core import PackageRequirement, EnvVarRequirement

for requirement in [
    PackageRequirement("boto3"),
    EnvVarRequirement("AWS_ACCESS_KEY_ID"),
    EnvVarRequirement("AWS_SECRET_ACCESS_KEY"),
    EnvVarRequirement("AWS_REGION"),
    EnvVarRequirement("AWS_S3_BUCKET"),
]:
    assert requirement.is_available()

In [3]:
from IPython.display import Code

lines = !cat s3_document_config.py
Code("\n".join(lines))

In [4]:
from s3_document_config import config

URL = config.ragna_api_url
USER = "Ragna"

URL

'http://127.0.0.1:31476'

In [5]:
import subprocess

proc = subprocess.Popen(["ragna", "api", "--config", "s3_document_config.py::config"])

try:
    # FIXME: there needs to be a better way to check this.
    stdout, stderr = proc.communicate(timeout=5)
except subprocess.TimeoutExpired:
    # This means the worker process did not shut down and thus seems to be
    # running
    pass
else:
    raise RuntimeError(f"REST API terminated unexpectedly {stdout} {stderr}")

{"event": "Started redis server", "timestamp": "2023-09-19T10:48:27.185626Z", "level": "info", "pathname": "/home/philip/git/ora/ragna/core/_rag.py", "lineno": 44}
{"event": "Started ragna worker", "timestamp": "2023-09-19T10:48:29.217200Z", "level": "info", "pathname": "/home/philip/git/ora/ragna/core/_rag.py", "lineno": 48}


INFO:     Started server process [22736]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:31476 (Press CTRL+C to quit)


In [6]:
paths = []
for i in range(3):
    path = Path.cwd() / f"document{i}.txt"
    with open(path, "w") as file:
        file.write(f"This is content of document {i} located on S3 \n")
    paths.append(path)

In [7]:
from pprint import pprint

import httpx

client = httpx.AsyncClient()

In [8]:
path = paths[0]

response = await client.get(
    f"{URL}/document/new", params={"user": USER, "name": path.name}
)
document_info = response.json()
pprint(document_info)

INFO:     127.0.0.1:54146 - "GET /document/new?user=Ragna&name=document0.txt HTTP/1.1" 200 OK
{'data': {'key': '8867917c-0b9c-43eb-8a16-2ed4555ce23b',
          'policy': 'eyJleHBpcmF0aW9uIjogIjIwMjMtMDktMTlUMTA6NTM6MzFaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAicG1laWVyLXByZXNpZ25lZC11cmxzLXRlc3QifSwgeyJrZXkiOiAiODg2NzkxN2MtMGI5Yy00M2ViLThhMTYtMmVkNDU1NWNlMjNiIn0sIHsieC1hbXotYWxnb3JpdGhtIjogIkFXUzQtSE1BQy1TSEEyNTYifSwgeyJ4LWFtei1jcmVkZW50aWFsIjogIkFLSUEzN1lSWk4zVlZLNlhGUjM2LzIwMjMwOTE5L2V1LWNlbnRyYWwtMS9zMy9hd3M0X3JlcXVlc3QifSwgeyJ4LWFtei1kYXRlIjogIjIwMjMwOTE5VDEwNDgzMVoifV19',
          'x-amz-algorithm': 'AWS4-HMAC-SHA256',
          'x-amz-credential': 'AKIA37YRZN3VVK6XFR36/20230919/eu-central-1/s3/aws4_request',
          'x-amz-date': '20230919T104831Z',
          'x-amz-signature': '3e7ffa8f4e705e28d0ff9cdba742f50d11450b78dc237e861b6bb627a85f5df3'},
 'document': {'id': '8867917c-0b9c-43eb-8a16-2ed4555ce23b',
              'name': 'document0.txt'},
 'url': 'https://pmeier-presigned-

In [9]:
response = await client.post(
    document_info["url"],
    data=document_info["data"],
    files={"file": open(path, "rb")},
)
assert response.is_success