In [None]:
! pip install pymongo
! pip install python-dotenv

In [1]:
import pymongo  
import json
from dotenv import dotenv_values

In [2]:
env_name = "myconfig.env" 
config = dotenv_values(env_name)

# Connection string
cosmos_conn = config['cosmos_mongo_connection_string']
cosmos_client = pymongo.MongoClient(cosmos_conn)

# Database name
database = cosmos_client[config['cosmos_database']]

# Collection names
actual_ratings = database[config['cosmos_actual_ratings']]
predicted_ratings = database[config['cosmos_predicted_ratings']]
product_catalog = database[config['cosmos_product_collection']]

# Drop the database if recreating from scratch
# cosmos_client.drop_database(config['cosmos_database'])



Using existing collection: 'ProductCollection'.
Using existing collection: 'ActualRating'.
Using existing collection: 'PredictedRating'.


In [5]:
# Create the collections and indexes if they do not exist
collection_names = [actual_ratings, predicted_ratings, product_catalog]

# Iterate through the collection names and create them if they do not exist
for collection_name in collection_names:
    if collection_name not in database.list_collection_names():
        # Creates a collection
        database.create_collection(collection_name)
        print(f"Created collection '{collection_name}'.")
    else:
        print(f"Using existing collection: '{collection_name}'.")


# Create index for Type and Id in ProductCollection 
database.command({
  'createIndexes': product_catalog,
  'indexes': [
    {
      'key': {
        'Type': 1,
        'Id': 1
      }
    }
  ]
})


# create vector index on ProductCollection
database.command({
  'createIndexes': product_catalog,
  'indexes': [
    {
      'name': 'vectorSearchIndex',
      'key': {
        "Embedding": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-hnsw',
        'm': 16,
        'efConstruction': 40,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
});

In [5]:
# load the product catalog data
data_file = open(file="./data/catalog/catalog.json", mode="r") 
data = json.load(data_file)
data_file.close()

# Insert the data into the ProductCollection
products = database[product_catalog]
result = products.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {products.name}")

Number of data points added: 101 in ProductCollection


In [4]:
# load the rating data
data_file = open(file="./data/ratings/actualRatings.json", mode="r") 
data = json.load(data_file)
data_file.close()

actual_rating = db['ActualRating']

result = actual_rating.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {actual_rating.name}")

Number of data points added: 200000 in ActualRating
