# Ingesting Data of Car Model / Brand / Manufactured Year into Qdrant

In [21]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
import io

load_dotenv()

QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
QDRANT_URL = os.getenv('QDRANT_URL')

qdrant_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

print(qdrant_client.get_collections())

collections=[]


In [22]:
df = pd.read_csv('car_dataset.csv')
df.head()

Unnamed: 0,id,car_brand,car_model,year_start,year_end
0,1,Proton,S70,2023,2025
1,2,Proton,Persona,1993,2025
2,3,Proton,Perdana,2013,2025
3,4,Proton,Saga,1985,2025
4,5,Proton,X90,2023,2025


In [23]:
# Create collection with payload index for vector_type
from qdrant_client.models import PayloadSchemaType

# Collection name
collection_name = "car_data"  

# Check if collection exists
collections = qdrant_client.get_collections().collections
collection_names = [c.name for c in collections]

if collection_name in collection_names:
    # Delete existing collection to recreate with proper indexes
    qdrant_client.delete_collection(collection_name)
    print(f"Deleted existing collection '{collection_name}'")

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
print(f"Created collection '{collection_name}'")

# Add payload index for vector_type field
qdrant_client.create_payload_index(
    collection_name=collection_name,
    field_name="vector_type",
    field_schema=PayloadSchemaType.KEYWORD
)
print(f"Added payload index on vector_type to collection '{collection_name}'")

Created collection 'car_data'
Added payload index on vector_type to collection 'car_data'


In [24]:
# Load the sentence transformer model
model = SentenceTransformer("intfloat/multilingual-e5-small")

In [25]:
# Function to create points from DataFrame with enhanced embedding context
def create_points_from_df(df):
    brand_points = []
    model_points = []
    
    for idx, row in df.iterrows():
        # Create a record for car brand with enhanced context
        # Adding "car brand" to make the embedding more context-aware
        brand_embedding = model.encode(row['car_brand'])
        brand_point = PointStruct(
            id=int(row['id']),
            vector=brand_embedding.tolist(),
            payload={
                "id": int(row['id']),
                "car_brand": row['car_brand'],
                "car_model": row['car_model'],
                "year_start": int(row['year_start']),
                "year_end": int(row['year_end']),
                "vector_type": "brand"
            }
        )
        brand_points.append(brand_point)
        
        # Create a record for car model with enhanced context
        # Adding brand context to improve model embeddings
        model_embedding = model.encode(row['car_model'])
        model_point = PointStruct(
            # Use offset for model IDs to avoid collision with brand IDs
            id=int(row['id']) + 10000,  
            vector=model_embedding.tolist(),
            payload={
                "id": int(row['id']),
                "car_brand": row['car_brand'],
                "car_model": row['car_model'],
                "year_start": int(row['year_start']),
                "year_end": int(row['year_end']),
                "vector_type": "model"
            }
        )
        model_points.append(model_point)
    
    return brand_points + model_points

# Create points from the DataFrame
points = create_points_from_df(df)
print(f"Created {len(points)} points for ingestion")

# Ingest the data into Qdrant
qdrant_client.upsert(
    collection_name=collection_name,
    points=points
)

print(f"Successfully ingested {len(points)} points into Qdrant collection '{collection_name}'")

Created 124 points for ingestion
Successfully ingested 124 points into Qdrant collection 'car_data'
