In [39]:
import random
import pandas as pd
from pprint import pprint

from qdrant_client import QdrantClient, models

In [40]:
client = QdrantClient(url="http://localhost:6333")

In [41]:
COLLECTION_NAME = "OCR-TEST"

In [None]:
exist = client.collection_exists(collection_name=COLLECTION_NAME)
if exist:
    print("Collection already exists. Deleting...")
    client.delete_collection(collection_name=COLLECTION_NAME)

client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE),
)

True

In [43]:
sample_data = [
    {"Type": "Product", "Name": "Fernseher Smart TV QLED 4K", "SKU": "123456", "Price": 1000},
    {"Type": "Product", "Name": "Aufbauservice", "SKU": "123457", "Price": 120},
    {"Type": "Product", "Name": "Farbe Weiß - 20L Eimer", "SKU": "1123", "Price": 25.49},
    {"Type": "Product", "Name": "Streich-Service", "SKU": "1124", "Price": 600},
    {"Type": "Order", "OrderID": "1234", "Products": ["123456", "123457"], "Total": 1120},
    {"Type": "Order", "OrderID": "1235", "Products": ["123458", "123459"], "Total": 625.49},
]

In [44]:
df = pd.DataFrame(sample_data)

df.head()

Unnamed: 0,Type,Name,SKU,Price,OrderID,Products,Total
0,Product,Fernseher Smart TV QLED 4K,123456.0,1000.0,,,
1,Product,Aufbauservice,123457.0,120.0,,,
2,Product,Farbe Weiß - 20L Eimer,1123.0,25.49,,,
3,Product,Streich-Service,1124.0,600.0,,,
4,Order,,,,1234.0,"[123456, 123457]",1120.0


In [45]:
# Inserting data into the collection
points = []
for i, row in df.iterrows():
    point = models.PointStruct(
        id=i,
        payload=row.to_dict(),
        vector=[random.random() for _ in range(100)],
    )
    points.append(point)

In [46]:
client.upsert(
    collection_name=COLLECTION_NAME,
    points=points,
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [47]:
x = client.scroll(
    collection_name=COLLECTION_NAME,
    limit=10,
    with_payload=True,
    with_vectors=False,
)

pprint(x[0][0].payload)

{'Name': 'Fernseher Smart TV QLED 4K',
 'OrderID': None,
 'Price': 1000.0,
 'Products': None,
 'SKU': '123456',
 'Total': None,
 'Type': 'Product'}


In [48]:
results = []
for point in x[0]:
    results.append(point.payload)

pprint(results)

[{'Name': 'Fernseher Smart TV QLED 4K',
  'OrderID': None,
  'Price': 1000.0,
  'Products': None,
  'SKU': '123456',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Aufbauservice',
  'OrderID': None,
  'Price': 120.0,
  'Products': None,
  'SKU': '123457',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Farbe Weiß - 20L Eimer',
  'OrderID': None,
  'Price': 25.49,
  'Products': None,
  'SKU': '1123',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Streich-Service',
  'OrderID': None,
  'Price': 600.0,
  'Products': None,
  'SKU': '1124',
  'Total': None,
  'Type': 'Product'},
 {'Name': None,
  'OrderID': '1234',
  'Price': None,
  'Products': ['123456', '123457'],
  'SKU': None,
  'Total': 1120.0,
  'Type': 'Order'},
 {'Name': None,
  'OrderID': '1235',
  'Price': None,
  'Products': ['123458', '123459'],
  'SKU': None,
  'Total': 625.49,
  'Type': 'Order'}]


In [49]:
products = list(filter(lambda item: item['Type'] == 'Product', results))
pprint(products)

[{'Name': 'Fernseher Smart TV QLED 4K',
  'OrderID': None,
  'Price': 1000.0,
  'Products': None,
  'SKU': '123456',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Aufbauservice',
  'OrderID': None,
  'Price': 120.0,
  'Products': None,
  'SKU': '123457',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Farbe Weiß - 20L Eimer',
  'OrderID': None,
  'Price': 25.49,
  'Products': None,
  'SKU': '1123',
  'Total': None,
  'Type': 'Product'},
 {'Name': 'Streich-Service',
  'OrderID': None,
  'Price': 600.0,
  'Products': None,
  'SKU': '1124',
  'Total': None,
  'Type': 'Product'}]


In [50]:
orders = list(filter(lambda item: item["Type"] == "Order", results))
pprint(orders)

[{'Name': None,
  'OrderID': '1234',
  'Price': None,
  'Products': ['123456', '123457'],
  'SKU': None,
  'Total': 1120.0,
  'Type': 'Order'},
 {'Name': None,
  'OrderID': '1235',
  'Price': None,
  'Products': ['123458', '123459'],
  'SKU': None,
  'Total': 625.49,
  'Type': 'Order'}]
