In [37]:
from datetime import datetime

date_str = datetime.now().strftime("%Y%m%d")

In [38]:
from qdrant_client import QdrantClient, models
import os

from dotenv import load_dotenv

load_dotenv()

client = QdrantClient(url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"))
client.create_collection(
    collection_name=f"{date_str}_scraped",
    vectors_config={
        "content": models.VectorParams(size=1024, distance=models.Distance.COSINE),
        "paragraph": models.VectorParams(
            size=1024,
            distance=models.Distance.COSINE,
            multivector_config={"comparator": "max_sim"},
        ),
    },
)

True

In [39]:
import glob

embed_files = glob.glob(f"{date_str}_*.1.4.json")

In [63]:
import json

all_data = []
index = 1
for file in embed_files:
    with open(file, "r") as f:
        print("Uploading", file)
        data = json.load(f)
        points = []
        for payload in data:
            embedding = payload.pop("embedding")
            paragraph_embeddings = payload.pop("paragraph_embeddings")
            if len(paragraph_embeddings) == 0:
                paragraph_embeddings = [[0] * 1024]

            result = client.upsert(
                collection_name=f"{date_str}_scraped",
                points=[
                    models.PointStruct(
                        id=index,
                        vector={
                            "content": embedding,
                            "paragraph": paragraph_embeddings,
                        },
                        payload=payload,
                    )
                ],
            )
            index += 1
print("Done")

Uploading 20250207_DeepBook.1.4.json
Uploading 20250207_NAVI.1.4.json
Uploading 20250207_DoubleUp.1.4.json
Uploading 20250207_Walrus.1.4.json
Uploading 20250207_Metastable.1.4.json
Uploading 20250207_OmniBTC.1.4.json
Uploading 20250207_Suilend.1.4.json
Uploading 20250207_Bucket Protocol.1.4.json
Uploading 20250207_Aftermath.1.4.json
Uploading 20250207_Bluefin.1.4.json
Uploading 20250207_SuiNS.1.4.json
Uploading 20250207_Scallop.1.4.json
Uploading 20250207_Mole.1.4.json
Uploading 20250207_Strater.1.4.json
Uploading 20250207_Kai Finance.1.4.json
Uploading 20250207_Kriya.1.4.json
Uploading 20250207_Cetus Protocol.1.4.json
Uploading 20250207_Sudo Finance.1.4.json
Uploading 20250207_Turbos.1.4.json
Uploading 20250207_Haedal Protocol.1.4.json
Uploading 20250207_Typus.1.4.json
Uploading 20250207_FlowX.1.4.json
Uploading 20250207_AlphaFi.1.4.json
Done
