In [None]:
import os
import re
import sys
from pathlib import Path
from uuid import uuid4

import weaviate
import weaviate.classes.config as wvcc
from weaviate.classes.config import Configure
from weaviate.util import get_valid_uuid

from ai_journal.storage import example_data_location, read_user_data
from ai_journal.text import read_and_chunk_files, chunk_list, split_into_sentences

client = weaviate.connect_to_local(host="localhost")

In [None]:
client.collections.delete_all()

In [None]:
collection = client.collections.create(
    name="WeaviateJournalChunk",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="title_vector",
            source_properties=["title"],
            # If using Docker, use this to contact your local Ollama instance
            api_endpoint="http://host.docker.internal:11434",
            model="llama3:8b",  # The model to use, e.g. "nomic-embed-text"
        )
    ],
    generative_config=Configure.Generative.ollama(
        api_endpoint="http://host.docker.internal:11434"
    ),
    properties=[
        wvcc.Property(name="content", data_type=wvcc.DataType.TEXT),
        wvcc.Property(name="author", data_type=wvcc.DataType.TEXT),
    ],
)

In [None]:
# Example usage
journal_chunks = read_and_chunk_files(example_data_location)

In [None]:
len(journal_chunks)

journal_chunks[0]

In [None]:
journal_collection = client.collections.get("WeaviateJournalChunk")

for idx, journal_chunk in enumerate(journal_chunks):
    upload = journal_collection.data.insert(properties={"content": journal_chunk})

print(f"Uploaded {idx} journal chunks.")