In [2]:
%load_ext autoreload
%autoreload 2

import os
import sys

proyecto_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if proyecto_path not in sys.path:
    sys.path.append(proyecto_path)


In [3]:
from inmueblesapp.database import MongoSingleton

client = MongoSingleton.get_client(local=True)
db = client["inmuebles_db"]
print("Databases:", client.list_database_names())


Databases: ['admin', 'config', 'inmuebles_db', 'local']


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# List all collections
db.list_collection_names()


['Arriendo', 'Venta']

In [5]:
# Insert a single document (creates collection automatically)
result = db["properties"].insert_one({
    "PROPERTY_TYPE": "Apartamento",
    "PRICE": 1_500_000,
    "ROOMS": 3,
    "BATHROOMS": 2,
    "BUILT_AREA": 85.0,
    "CITY": "Bogotá",
    "SOURCE": "test"
})
print(f"Inserted ID: {result.inserted_id}")


Inserted ID: 6992740cfab6880b47f9fd1a


In [6]:
# Insert multiple documents
docs = [
    {"PROPERTY_TYPE": "Casa", "PRICE": 3_200_000, "ROOMS": 4, "CITY": "Medellín"},
    {"PROPERTY_TYPE": "Apartaestudio", "PRICE": 800_000, "ROOMS": 1, "CITY": "Bogotá"},
    {"PROPERTY_TYPE": "Apartamento", "PRICE": 2_100_000, "ROOMS": 2, "CITY": "Cali"},
]
result = db["properties"].insert_many(docs)
print(f"Inserted {len(result.inserted_ids)} documents")


Inserted 3 documents


In [7]:
# Find with filter
for doc in db["properties"].find({"CITY": "Bogotá"}):
    print(doc)


{'_id': ObjectId('6992740cfab6880b47f9fd1a'), 'PROPERTY_TYPE': 'Apartamento', 'PRICE': 1500000, 'ROOMS': 3, 'BATHROOMS': 2, 'BUILT_AREA': 85.0, 'CITY': 'Bogotá', 'SOURCE': 'test'}
{'_id': ObjectId('69927413fab6880b47f9fd1c'), 'PROPERTY_TYPE': 'Apartaestudio', 'PRICE': 800000, 'ROOMS': 1, 'CITY': 'Bogotá'}


In [8]:
# Count documents
total = db["properties"].count_documents({})
bogota = db["properties"].count_documents({"CITY": "Bogotá"})
print(f"Total: {total}, Bogotá: {bogota}")


Total: 4, Bogotá: 2


In [9]:
# Unique values of a field (what Qdrant can't do easily)
print("Property types:", db["properties"].distinct("PROPERTY_TYPE"))
print("Cities:", db["properties"].distinct("CITY"))


Property types: ['Apartaestudio', 'Apartamento', 'Casa']
Cities: ['Bogotá', 'Cali', 'Medellín']


In [13]:
# Min and Max of a field
from pymongo import ASCENDING, DESCENDING

min_price = db["Arriendo"].find_one(sort=[("PRICE", ASCENDING)])
max_price = db["Arriendo"].find_one(sort=[("PRICE", DESCENDING)])

print(f"Min price: {min_price['PRICE']:,}")
print(f"Max price: {max_price['PRICE']:,}")


Min price: 0
Max price: 2,300,986,000.0


In [15]:
# Aggregation pipeline: avg price by property type
pipeline = [
    {"$group": {
        "_id": "$PROPERTY_TYPE",
        "avg_price": {"$avg": "$PRICE"},
        "count": {"$sum": 1},
        "min_price": {"$min": "$PRICE"},
        "max_price": {"$max": "$PRICE"}
    }},
    {"$sort": {"avg_price": -1}}
]

for doc in db["Arriendo"].aggregate(pipeline):
    print(doc)


{'_id': 'Edificio', 'avg_price': 122086983.08884297, 'count': 484, 'min_price': 1500000, 'max_price': 2300986000.0}
{'_id': 'Lote', 'avg_price': 59986018.18181818, 'count': 110, 'min_price': 30000, 'max_price': 344599999.9999998}
{'_id': 'Bodega', 'avg_price': 37696956.28683363, 'count': 1118, 'min_price': 300000, 'max_price': 276980728.63999975}
{'_id': 'Oficina', 'avg_price': 21881079.82142857, 'count': 10752, 'min_price': 0, 'max_price': 251795999.9999999}
{'_id': 'Casa Lote', 'avg_price': 19278571.428571425, 'count': 35, 'min_price': 2900000, 'max_price': 58199999.99999993}
{'_id': 'Local', 'avg_price': 13201813.76726813, 'count': 6340, 'min_price': 111111, 'max_price': 115964272.31999913}
{'_id': 'Casa', 'avg_price': 11848728.818971759, 'count': 4143, 'min_price': 0, 'max_price': 49991600.0}
{'_id': 'Casa Campestre', 'avg_price': 11747472.0, 'count': 25, 'min_price': 5000000, 'max_price': 28079999.999999985}
{'_id': 'Consultorio', 'avg_price': 6927758.848780488, 'count': 205, 'min

In [None]:
query_filter = {"PROPERTY_TYPE": 'Casa', 'BUILT_AREA': {"$exists": True, "$type": "number"}}
db['Arriendo'].find_one(query_filter, {'BUILT_AREA': 1}, sort=[('BUILT_AREA', ASCENDING)])

In [None]:
# ⚠️ Drop test data (uncomment to run)
# db["properties"].drop()
# print("Collection dropped")
