# Load to qdrant

In [1]:
from pymilvus import MilvusClient, DataType, Collection, connections
import pandas as pd
import numpy as np
import os

In [2]:
client = MilvusClient(
    uri="http://localhost:19530"
)

client

<pymilvus.milvus_client.milvus_client.MilvusClient at 0x2b6405c32f0>

In [3]:
basePath = os.path.abspath('')
basePath

'C:\\Users\\gorku\\Documents\\bmstu\\RAG24\\late-chunking-ru'

In [7]:
schema = MilvusClient.create_schema(
    auto_id=False,
    enable_dynamic_field=True,
)

schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="chunk", datatype=DataType.VARCHAR, max_length=4096)
schema.add_field(field_name="trad_chunk_embedding", datatype=DataType.FLOAT_VECTOR, dim=1024)
schema.add_field(field_name="new_chunk_embedding", datatype=DataType.FLOAT_VECTOR, dim=1024)
schema.add_field(field_name="doc_id", datatype=DataType.INT64)

index_params = client.prepare_index_params()

index_params.add_index(
    field_name="id",
    index_type="",
    sync=True,
    extra_params={"mmap.enabled": True}
)

index_params.add_index(
    field_name="trad_chunk_embedding", 
    index_type="DISKANN",
    metric_type="COSINE",
    params={ "nlist": 1024 },
    sync=True,
    extra_params={"mmap.enabled": True}
)

index_params.add_index(
    field_name="new_chunk_embedding", 
    index_type="DISKANN",
    metric_type="COSINE",
    params={ "nlist": 1024 },
    sync=True,
    extra_params={"mmap.enabled": True}
)


In [8]:
client.create_collection(
    collection_name="chunks_embedded",
    schema=schema,
    index_params=index_params
)

connections.connect("default", host="localhost", port="19530")

collection = Collection("chunks_embedded")

collection.release()

collection.set_properties({'mmap.enabled': True})


In [9]:
collection.load()


In [10]:
client.get_load_state(
    collection_name="chunks_embedded"
)


{'state': <LoadState: Loaded>}

### DANGER ZONE

In [5]:
client.drop_collection(
    collection_name="chunks_embedded"
)

In [11]:
def stick_it(payloads_, last_idx_):
    added = 0
    for payload in payloads_:
        res = client.insert(
            collection_name="chunks_embedded",
            data=[
                {
                    "id" : id + last_idx_, "chunk" : row['chunk'], "trad_chunk_embedding" : row['trad_chunk_embedding'], "new_chunk_embedding" : row['new_chunk_embedding'], "doc_id" : row['doc_id']
                } for id, row in payload.iterrows()
            ]
        )
        add = res['insert_count']
        if add != payload.shape[0]:
            print("Error!")
            return None
        added += add
    return added


In [12]:
%%time
last_idx = 0
for i in range(22):
    chunks_embedded_n = pd.read_pickle(basePath + "\\ai-forever-ria-news-retrieval\\chunks_embedded_"+str(i)+".pkl")
    payloads = np.array_split(chunks_embedded_n, chunks_embedded_n.shape[0] / 7000)
    delta = chunks_embedded_n.shape[0]
    print(stick_it(payloads, last_idx), delta)
    last_idx += delta
    del payloads
    del chunks_embedded_n

  return bound(*args, **kwds)


234628 234628


  return bound(*args, **kwds)


231833 231833


  return bound(*args, **kwds)


240764 240764


  return bound(*args, **kwds)


241110 241110


  return bound(*args, **kwds)


247331 247331


  return bound(*args, **kwds)


246433 246433


  return bound(*args, **kwds)


245563 245563


  return bound(*args, **kwds)


250901 250901


  return bound(*args, **kwds)


250559 250559


  return bound(*args, **kwds)


253142 253142


  return bound(*args, **kwds)


253865 253865


  return bound(*args, **kwds)


252677 252677


  return bound(*args, **kwds)


252887 252887


  return bound(*args, **kwds)


252142 252142


  return bound(*args, **kwds)


255681 255681


  return bound(*args, **kwds)


259470 259470


  return bound(*args, **kwds)


257050 257050


  return bound(*args, **kwds)


255660 255660


  return bound(*args, **kwds)


258399 258399


  return bound(*args, **kwds)


271333 271333


  return bound(*args, **kwds)


258252 258252


  return bound(*args, **kwds)


249231 249231
CPU times: total: 28min 51s
Wall time: 1h 30min 33s


In [13]:
last_idx

5518911

In [17]:
connections.disconnect("default")

### Trash

In [8]:
chunks_embedded_n = pd.read_pickle(basePath + "\\ai-forever-ria-news-retrieval\\chunks_embedded_0.pkl")
chunks_embedded_n.shape

(234628, 4)

In [9]:
chunks_embedded_n.iloc[[-1]]

Unnamed: 0,chunk,trad_chunk_embedding,new_chunk_embedding,doc_id
234627,"dum spiro, spero.соболезнования родным и близ...","[0.0529799, -0.10728489, -0.020307904, 0.04234...","[1.1796875, -1.875, -0.013183594, 0.609375, 0....",32015


In [5]:
chunks_embedded_n = chunks_embedded_n.reset_index(drop=True)

In [7]:
chunks_embedded_n.to_pickle(basePath + "\\ai-forever-ria-news-retrieval\\chunks_embedded_0.pkl")


In [45]:
payloads = np.array_split(chunks_embedded_n, chunks_embedded_n.shape[0] / 7000)
payloads[0].shape

  return bound(*args, **kwds)


(7110, 4)

In [None]:
last_idx = 0
for payload in payloads:
    points = [
        models.PointStruct(
            id=id, vector=row['trad_chunk_embedding'], payload={"chunk": row['chunk'], "doc_id": row['doc_id']}
        ) for id, row in payload.iterrows()
    ]
    operation_info = client.upsert(
        collection_name="{chunks_embedded_trad}",
        wait=True,
        points=points,
    )
    last_idx += payload.shape[0]
    if operation_info.status != "completed":
        print("Error!")
        break
last_idx

In [42]:
for id, row in chunks_embedded_n.iterrows():
    if id == 28343:
        print("A ", chunks_embedded_n.index[id])
    if id == 28345:
        print("B ", chunks_embedded_n.index[id])

A  28343
B  28346


In [47]:
payloads[0].shape

(7110, 4)

In [61]:
%%time
res = client.insert(
    collection_name="chunks_embedded",
    data=[
        {
            "id" : id, "chunk" : row['chunk'], "trad_chunk_embedding" : row['trad_chunk_embedding'], "new_chunk_embedding" : row['new_chunk_embedding'], "doc_id" : row['doc_id']
        } for id, row in payloads[0].iterrows()
    ]
)

CPU times: total: 2.33 s
Wall time: 7.06 s


In [66]:
res['insert_count'] == payloads[0].shape[0]

True

In [16]:
client.get(
    collection_name="chunks_embedded",
    ids=[5518910]
)

data: ["{'trad_chunk_embedding': [0.05621778, 0.03213727, -0.007372917, 0.058613904, -0.046933513, -0.1865095, -0.018278578, 0.001619006, -0.17829084, 0.0020009445, -0.0041661807, 0.05058364, -0.04056935, -0.010017232, -0.046516713, -0.11094019, -0.08805039, -0.015233375, 0.029342277, 0.08198647, -0.005583823, -0.05279726, -0.09186535, 0.018865835, -0.0155954445, 0.12568197, 0.00519241, 0.10719586, -0.020464605, 0.10290991, 0.0974465, 0.12388046, -0.022666454, 0.018115206, -0.022159042, 0.04709247, -0.008114532, 0.017028265, -0.058449432, 0.0076271747, 0.09132372, 0.0026088075, 0.019679755, 0.034698244, 0.064418785, 0.026714336, 0.008320679, -0.030110938, -0.08263407, -0.0003988642, 0.0029031723, -0.068763606, -0.06566689, 0.03806504, 0.08708487, 0.03248315, -0.0055252262, -0.0039549745, -0.072549134, -0.10192085, -0.027798336, -0.032606784, -0.0057161953, -0.010541202, 0.009073793, -0.06033889, 0.030195937, -0.07652306, 0.0023630129, 0.050454117, -0.04372494, 0.0071159, -0.016502088, 

In [56]:
client.delete(
    collection_name="chunks_embedded",
    filter="id in "+str(list(range(7110)))
)

{'delete_count': 7110, 'cost': 0}

In [40]:
client.create_collection(
    collection_name="{chunks_embedded_new}",
    vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE, on_disk=True),
    on_disk_payload=True,
    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=0,
    ),
)

True

In [38]:
client.delete_collection(collection_name="{chunks_embedded_new}")

True

In [8]:
chunks_embedded_n = pd.read_pickle(basePath + "\\ai-forever-ria-news-retrieval\\chunks_embedded_0.pkl")

In [30]:
#chunks_embedded_n['chunk'][28345]
chunks_embedded_n.iloc[[3839]]

Unnamed: 0,chunk,trad_chunk_embedding,new_chunk_embedding,doc_id
3839,"по его словам, на помощь сейнеру вышло погра...","[-0.041260462, -0.14105958, 0.039183214, -0.01...","[0.12402344, -0.38867188, 0.2734375, -0.746093...",529


In [5]:
client.get_collection(collection_name="{chunks_embedded_new}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=0, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=0, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantiza

In [15]:
payloads = np.array_split(chunks_embedded_n, chunks_embedded_n.shape[0] / 1000)
payloads[0].shape

  return bound(*args, **kwds)


(1003, 4)

In [24]:
payloads[28].iloc[250:300]

Unnamed: 0,chunk,trad_chunk_embedding,new_chunk_embedding,doc_id
28334,"сильнейший ливень, вызвавший мощное наводн...","[0.022568297, -0.087302975, 0.023843262, -0.07...","[-0.118652344, -1.828125, 1.1875, -0.43554688,...",3838
28335,"ранее стало известно, что в результате сильне...","[0.0012870327, -0.09821144, 0.08754822, -0.014...","[-0.11425781, -1.8984375, 1.1796875, -0.164062...",3838
28336,от наводнения частично пострадал аэропорт шар...,"[0.025234822, -0.094522074, 0.05937534, -0.030...","[0.12158203, -1.828125, 1.2421875, -0.2890625,...",3838
28337,из-за плохой погоды закрыты пять морских пор...,"[0.010627331, -0.070719995, -0.031499058, -0.0...","[0.027954102, -1.5078125, 1.203125, -0.3125, 0...",3838
28338,"по данным спасательных служб, во многих курор...","[-0.03727261, -0.045487158, -0.06894926, 0.014...","[-0.14355469, -1.6328125, 0.94921875, -0.01397...",3838
28339,селевыми потоками разрушены порядка 45 жилых ...,"[0.05095162, -0.098087795, -0.00092488277, -0....","[0.0033111572, -1.609375, 1.1640625, -0.3125, ...",3838
28340,"завоевание золотых медалей, выигрыш кубка рос...","[0.033281542, -0.12408914, -0.07292332, -0.004...","[0.45703125, -2.03125, -1.609375, 0.49414062, ...",3839
28341,"- если говорить о конкретных задачах, то в гр...","[0.022755839, -0.08163082, -0.099895306, 0.032...","[0.49804688, -1.8203125, -1.5546875, 0.7226562...",3839
28342,"мы надеемся, что с новым наставником сможем в...","[0.08612651, -0.13075887, -0.056889694, -0.036...","[0.51171875, -1.84375, -1.546875, 0.62890625, ...",3839
28343,"предложение ""зенита"" было самым интересным - ...","[-0.0015919321, -0.13995178, -0.07896757, 0.02...","[0.25, -2.21875, -1.3828125, 0.91015625, -1.96...",3839


In [9]:
chunks_embedded_n.shape

(234628, 4)

In [11]:
'''
%%time
for payload in payloads:
    operation_info = client.upsert(
        collection_name="{chunks_embedded_trad}",
        wait=True,
        points=[
            models.PointStruct(
                id=0, vector=row['trad_chunk_embedding'], payload={"chunk": row['chunk'], "doc_id": row['doc_id']}
            ) for id, row in payload.iterrows()
        ],
    )
    
    if operation_info.status != "completed":
        print("Error!")
        break
    

CPU times: total: 6min 52s
Wall time: 14min 7s


In [10]:
alah = []

In [11]:
%%time
last_idx = 0
for payload in payloads:
    points = [
        models.PointStruct(
            id=id, vector=row['trad_chunk_embedding'], payload={"chunk": row['chunk'], "doc_id": row['doc_id']}
        ) for id, row in payload.iterrows()
    ]
    if last_idx < 30000:
        alah += points
    operation_info = client.upsert(
        collection_name="{chunks_embedded_trad}",
        wait=True,
        points=points,
    )
    last_idx += payload.shape[0]
    if operation_info.status != "completed":
        print("Error!")
        break
last_idx

CPU times: total: 5min 23s
Wall time: 14min 6s


234628

In [41]:
point_id = 0
vec = client.retrieve(
    collection_name="{chunks_embedded_trad}",
    ids=[point_id],
    with_vectors=True,
)[0].vector


IndexError: list index out of range

In [32]:
client.query_points(
    collection_name="{chunks_embedded_trad}",
    query=vec,
    #with_payload=False,
    limit=3
).points

[ScoredPoint(id=0, version=0, score=1.0000001, payload={'chunk': 'премьер-министр украины, кандидат в президенты юлия тимошенко в воскресенье в прямом эфире украинского телеканала 1+1 заявила, что в случае ее победы на выборах президента юрий луценко будет работать в ее команде.', 'doc_id': 0}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=8, version=0, score=0.8719462, payload={'chunk': ' потому я считаю, что нужно сначала выиграть выборы, обсудить все вопросы создания новой команды, и я убеждена, что юрий луценко в команде будет работать", - сказала тимошенко.', 'doc_id': 0}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=5401, version=5, score=0.7870641, payload={'chunk': ' премьер-министр украины юлия тимошенко намерена назначить юрия луценко первым замглавы мвд, чтобы он продолжил руководить ведомством.', 'doc_id': 754}, vector=None, shard_key=None, order_value=None)]

In [6]:
client.get_collection(collection_name="{chunks_embedded_trad}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=0, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=0, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantiza

In [7]:
client.get_collection(collection_name="{chunks_embedded_new}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=0, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=0, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantiza

In [8]:
def stick_it(payloads_, suffix):
    column_name = suffix + '_chunk_embedding'
    collection_name = "{chunks_embedded_"+suffix+"}"
    for payload in payloads_:
        points = [
            models.PointStruct(
                id=id, vector=row[column_name], payload={"chunk": row['chunk'], "doc_id": row['doc_id']}
            ) for id, row in payload.iterrows()
        ]
        operation_info = client.upsert(
            collection_name=collection_name,
            wait=True,
            points=points,
        )
        if operation_info.status != "completed":
            print("Error!")
            break


In [9]:
for i in range(22):
    chunks_embedded_n = pd.read_pickle(basePath + "\\ai-forever-ria-news-retrieval\\chunks_embedded_"+str(i)+".pkl")
    payloads = np.array_split(chunks_embedded_n, chunks_embedded_n.shape[0] / 1000)
    stick_it(payloads, "trad")
    stick_it(payloads, "new")
    del payloads
    del chunks_embedded_n

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


In [10]:
client.get_collection(collection_name="{chunks_embedded_trad}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=271333, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=0, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), qua

In [11]:
client.get_collection(collection_name="{chunks_embedded_new}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=271333, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=0, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), qua

In [12]:
client.close()

In [13]:
client = QdrantClient(url="http://localhost:6333")
client

<qdrant_client.qdrant_client.QdrantClient at 0x167e80c91f0>

In [14]:
point_id = 0
vec = client.retrieve(
    collection_name="{chunks_embedded_trad}",
    ids=[point_id],
    with_vectors=True,
)[0].vector


In [15]:
client.query_points(
    collection_name="{chunks_embedded_trad}",
    query=vec,
    #with_payload=False,
    limit=3
).points

[ScoredPoint(id=0, version=5259, score=0.9999998, payload={'chunk': 'власти архангельской области приветствуют план развития соловецкого архипелага, утвержденный правительством рф, говорится в сообщении пресс-службы губернатора и правительства области.', 'doc_id': 672329}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=5, version=5259, score=0.8570045, payload={'chunk': ' на самом высоком уровне определен перечень мероприятий и источники финансирование", — заявил заместитель губернатора архангельской области по развитию соловецкого архипелага роман балашов, слова которого цитирует пресс-служба губернатора и правительства.', 'doc_id': 672329}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=4, version=5259, score=0.75022787, payload={'chunk': ' правительство рф приняло решение по комплексу мер по развитию соловецкого архипелага.', 'doc_id': 672329}, vector=None, shard_key=None, order_value=None)]

In [16]:
client.update_collection(
    collection_name="{chunks_embedded_trad}",
    optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000),
)

True

In [17]:
client.update_collection(
    collection_name="{chunks_embedded_new}",
    optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000),
)

True

In [70]:
client.get_collection(collection_name="{chunks_embedded_trad}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=271333, points_count=271333, segments_count=7, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahea

In [71]:
client.get_collection(collection_name="{chunks_embedded_new}")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=271333, points_count=271333, segments_count=7, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahea

In [24]:
node_url = "http://localhost:6333"

In [43]:
snapshot_info_trad = client.create_snapshot(collection_name="{chunks_embedded_trad}", wait=False)


In [44]:
snapshot_info_new = client.create_snapshot(collection_name="{chunks_embedded_new}", wait=False)


In [74]:
client.list_snapshots(collection_name="{chunks_embedded_trad}")

[]

In [31]:
snapshot_url_trad = f"{node_url}/collections/test_collection/snapshots/{snapshot_info_trad.name}"
snapshot_url_new = f"{node_url}/collections/test_collection/snapshots/{snapshot_info_new.name}"

AttributeError: 'NoneType' object has no attribute 'name'

In [None]:
snapshot_urls = [snapshot_url_trad, snapshot_url_new]

In [None]:
import requests
import os

In [None]:
# Create a directory to store snapshots
os.makedirs("snapshots", exist_ok=True)

local_snapshot_paths = []
for snapshot_url in snapshot_urls:
    snapshot_name = os.path.basename(snapshot_url)
    local_snapshot_path = os.path.join("snapshots", snapshot_name)

    response = requests.get(
        snapshot_url, headers={"api-key": QDRANT_API_KEY}
    )
    with open(local_snapshot_path, "wb") as f:
        response.raise_for_status()
        f.write(response.content)

    local_snapshot_paths.append(local_snapshot_path)

local_snapshot_paths