In [1]:
import os 
import json
from pymongo import MongoClient
from pymongo import MongoClient
import pandas as pd
MONGO_CONNECTION_STRING = ""


In [2]:
def json_to_mongo(json_file, collection_name, database_name):
    # Load JSON data
    with open(json_file, "r") as f:
        data = json.load(f)

    # Add 'data_to_embed' field
    for item in data:
        # Can this be done at the time of embedding? Projection did not work in contoso-booking notebook
        item['data_to_embed'] = f"{item.get('name', '')} {item.get('description', '')} {item.get('neighborhood_overview', '')}" 

    #Insert JSON data into MongoDB collection
    client = MongoClient(MONGO_CONNECTION_STRING)
    db = client[database_name]
    collection = db[collection_name]
    collection.insert_many(data)
    client.close()
    print(f"Inserted {len(data)} documents into the '{collection_name}' collection in MongoDB.")

json_to_mongo("data/datasets without embeddings/small_for_testing.json", "listings", "contoso_bookings_concat" )

Inserted 1434 documents into the 'listings' collection in MongoDB.


In [5]:
client = MongoClient(MONGO_CONNECTION_STRING)
db = client["contoso_bookings_concat"]
collection = db["listings"]
db.command({
  'createIndexes': 'listings',
  'indexes': [
    {
      'name': 'listingIndex',
      'key': {
        "embeddings": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-diskann',
        'numLists': 1,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
})

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

In [None]:
import time
from pymongo import UpdateOne
batch_size = 15
total_updated = 0
iteration = 0

while True:
    iteration += 1

    # Find records to update
    records_to_update = collection.find({
    '$and': [
        {'embeddings': {'$exists': False}},
        {'data_to_embed': {'$exists': True}}
    ]
    }).limit(batch_size)

    records_to_update = list(records_to_update)
    print(records_to_update)

    if not records_to_update:
        print(f"All rows have been updated. Total updated rows: {total_updated}")
        break

    total_updated += len(records_to_update)

    print(f"Iteration: {iteration}, has handled {total_updated} rows")

    # Prepare bulk operations
    bulk_ops = [
        UpdateOne({'_id': record['_id']},
                {'$generateEmbeddings': {'data_to_embed': 'embeddings'}})
        for record in records_to_update
    ]

    if bulk_ops:
        result = collection.bulk_write(bulk_ops)
        print(f"Bulk write result: {result.bulk_api_result}")

    time.sleep(0.5)  # Sleep for 500 ms


[{'_id': ObjectId('67288be6f310036be2627a5e'), 'id': '360', 'listing_url': 'https://www.airbnb.com/rooms/360', 'source': 'city scrape', 'name': 'Sit in the Peaceful Garden of the Chickadee Cottage in LoHi', 'description': "Enjoy the famous Colorado weather and unplug in indoor & outdoor living. <br />Our charming cottage has a serene ambiance throughout every area. <br />Spend a sunny afternoon out on the hammock or enjoy the garden parlor sofa relax, read, or play a game, sink into the shared hot tub, practice yoga on the deck.<br />We are located next to downtown and in the neighborhood of lower highlands, <br />Short walks to superb coffee shops, restaurants, microbrews, distilleries, dispensaries & downtown. We are 420 outdoors only. LGBT Friendly, allergy-free, fragrance-free & pet-free. Ozone sterilized.<br /><br />Chickadee Cottage is the largest of our guest cottages.<br /><br />LOCATION: <br />The cottage is located in the center of Lower Highlands (LOHI) next to the Navajo St

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627a6c'), 'id': '504080', 'listing_url': 'https://www.airbnb.com/rooms/504080', 'source': 'city scrape', 'name': 'Creative Haven in Artist’s Abode', 'description': 'This two-bedroom, two-bath house is filled with treasures, trinkets, art, and oddities from around the world. Features a spacious front yard, patio, cozy corners, large kitchen, and unique curios from near and far. Curious adventure seekers and art aficionados will delight at this special space!', 'neighborhood_overview': 'The Clayton neighborhood is a rapidly changing area. A historically Black neighborhood of Denver, in the last decade the neighborhood has been experiencing significant gentrification. Many people in the Clayton area actively participate in neighborhood initiatives to help build relationships and engage in mutua

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627a7b'), 'id': '958612', 'listing_url': 'https://www.airbnb.com/rooms/958612', 'source': 'city scrape', 'name': 'DENVER SUITE MOUNTAIN VIEW GREAT LOCATION', 'description': 'DENVER SUITE WITH A MOUNTAIN VIEW: live in the very heart of downtown and enjoy a beautiful and peaceful mountain view!  Fully furnished 2 bedroom / 2 bathroom suite with an open plan living/dining room furnished with contemporary leather furniture and original artwork.  Perfect for an extended stay for a small family or a business traveler.', 'neighborhood_overview': "IDEAL LOCATION: Walk out of this suite, take the elevator down, step through the lobby and you are THERE, truly in DOWNTOWN! The Performing Arts Center is just a half-block away. The Convention Center is just two blocks away as is historic Larimer Square, 

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627a88'), 'id': '1309159', 'listing_url': 'https://www.airbnb.com/rooms/1309159', 'source': 'city scrape', 'name': 'Logan Square Studio APT #3 - w/clawfoot tub', 'description': 'APT 3 is a ~300 sq ft studio on the second floor.  The large bathroom features the original cast iron tub refinished with all new hardware and shower.  The studio space comfortably fits a queen size bed, dresser, two sitting chairs, and a large adjustable table that can be used for dining or desk work.', 'neighborhood_overview': 'Walk Score - 93<br />In Northwest Cap Hill<br />Blocks from the Capitol and Civic Center Park<br />Easy walk to downtown Denver, Golden Triangle, South Broadway, Cap Hill, Cheeseman Park, upTown 17th St Restaurants and Ace Ping Pong and One Up Arcade Bar.  Easy walk to Fillmore and Ogden The

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627a96'), 'id': '1733052', 'listing_url': 'https://www.airbnb.com/rooms/1733052', 'source': 'city scrape', 'name': 'Beautiful Private Basement Suite near Downtown', 'description': 'Beautifully remodeled basement suite in lovely Victorian home in central Denver.', 'neighborhood_overview': 'Many historical homes with a beautiful park nearby. Tons of ethnic restaurants, museums, and places of interest within walking distance from our address. There are 4 breweries within a mile; another 6 breweries within 2 miles. Excellent music venues are close by; 2 (Fillmore and Ogden Theater) are a couple blocks away. Public transportation and bike rentals are convenient from our location.', 'latitude': '39.73799', 'longitude': '-104.97219', 'price': '$103.00', 'amenities': '["Hangers", "Essentials", "Dish

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627aa5'), 'id': '2232323', 'listing_url': 'https://www.airbnb.com/rooms/2232323', 'source': 'city scrape', 'name': 'Cozy Cottage in the Country Club,', 'description': 'You will enjoy my cozy home in the Country Club area.  The home is a 3 bed/3 bath with 2 bedrooms and a bath on the garden level.  The neighborhood is easily walkable to Cherry Creek and Capitol Hill restaurants and shopping.<br />', 'neighborhood_overview': 'Friendly with families of all ages who will say hi.', 'latitude': '39.72367', 'longitude': '-104.96557', 'price': '$260.00', 'amenities': '["Dedicated workspace", "Microwave", "Private backyard \\u2013 Fully fenced", "Dishes and silverware", "Air conditioning", "Indoor fireplace", "Smoke alarm", "Fireplace guards", "First aid kit", "Carbon monoxide alarm", "Fire extinguis

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627ab5'), 'id': '3150813', 'listing_url': 'https://www.airbnb.com/rooms/3150813', 'source': 'city scrape', 'name': 'Modern Urban Oasis in Rino', 'description': "This is a converted 2500 square foot modern industrial loft space on a beautiful double 10,000 square foot lot in a great part of town. New places opening daily here. A-line Blake New Mission Ballroom is a short walk!<br /><br /> Super tall ceilings, reclaimed wood floors,   mid-century modern, eclectic, bohemian vibe. It's equipped with an amazing sound system and commercial grade kitchen as well as spacious side yard with a fire pit. Ideal for large groups, reunions, and the like.", 'neighborhood_overview': 'Very close to the A-line Blake street stop from the airport. So many hip, new things to do in the neighborhood including the 

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627ac4'), 'id': '3891080', 'listing_url': 'https://www.airbnb.com/rooms/3891080', 'source': 'city scrape', 'name': 'Charming vintage Five Points apt', 'description': 'Welcome to the HEART of Five Points , the closest neighborhood to DOWNTOWN Denver - a walker’s and scooter/bike/ride share person’s CITY PARADISE. Blocks away from lively RINO arts district, Curtis Park, grocery, and SO many unique restaurants, coffee shops, brewpubs, along with easy access to the interstate for additional Colorado adventures. You can check off LOCATION with this rental! <br />This well appointed and charming vintage apartment is in our 1887 Victorian home and occupies the top floor.', 'neighborhood_overview': '', 'latitude': '39.75229', 'longitude': '-104.97978', 'price': '$110.00', 'amenities': '["Hangers", "

Bulk write result: {'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 15, 'nModified': 15, 'nRemoved': 0, 'upserted': []}
[{'_id': ObjectId('67288be6f310036be2627ad2'), 'id': '4350594', 'listing_url': 'https://www.airbnb.com/rooms/4350594', 'source': 'city scrape', 'name': 'Kid Friendly Home-Great Location with Sauna', 'description': "Charming home in great Denver neighborhood. 2 minute walk to a park with new play structures, 20 min walk (4 min drive) to French bakery, great coffee, movie theater, great restaurants & Trader Joe's. 15 min drive downtown Den, 10 min drive Denver Zoo & Museum of Nature & Science, 8 min drive Cherry Creek North (shopping, restaurants, bars) & Denver Botanic Gardens.", 'neighborhood_overview': '', 'latitude': '39.72904', 'longitude': '-104.92346', 'price': '$175.00', 'amenities': '["Hangers", "Essentials", "Dishwasher", "Dedicated workspace", "Toaster", "Children\\u2019s dinnerware", "Shampoo", "Hair dryer", "Oven", "

In [6]:
# list all the indexes

indexes = collection.list_indexes()

for index in indexes:
    print(index)

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('embeddings', 'cosmosSearch')])), ('name', 'listingIndex'), ('cosmosSearchOptions', SON([('kind', 'vector-diskann'), ('numLists', 1), ('similarity', 'COS'), ('dimensions', 1536)]))])
