In [None]:
! pip install pymongo
! pip install python-dotenv

In [1]:
import pymongo  
import json
from dotenv import dotenv_values

In [2]:
env_name = "myconfig.env" 
config = dotenv_values(env_name)

# Connection string
cosmos_conn = config['cosmos_connection_string']
cosmos_client = pymongo.MongoClient(cosmos_conn)

# Database name
DATABASE_NAME = "ProductRecommendation"
db = cosmos_client[DATABASE_NAME]

# Drop the database if it already exists (consider if this is really needed, as it will remove all existing data)
# cosmos_client.drop_database(DATABASE_NAME)

# Collection names
collection_names = ["ProductCollection", "ActualRating", "PredictedRating"]

# Iterate through the collection names and create them if they do not exist
for collection_name in collection_names:
    if collection_name not in db.list_collection_names():
        # Creates a collection
        db.create_collection(collection_name)
        print(f"Created collection '{collection_name}'.")
    else:
        print(f"Using existing collection: '{collection_name}'.")

Using existing collection: 'ProductCollection'.
Using existing collection: 'ActualRating'.
Using existing collection: 'PredictedRating'.


In [5]:
# create vector index on ProductCollection
db.command({
  'createIndexes': 'ProductCollection',
  'indexes': [
    {
      'name': 'vectorSearchIndex',
      'key': {
        "Embedding": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-hnsw',
        'm': 16,
        'efConstruction': 40,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
});

In [4]:
# Index creation for ProductType and rated product ids 

collection_product = db['ProductCollection']
collection_product.create_index([("Type", 1), ("Id", 1)])

'Type_1_Id_1'

In [5]:
# load the product catalog data
data_file = open(file="./data/catalog/catalog.json", mode="r") 
data = json.load(data_file)
data_file.close()

result = collection_product.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {collection_product.name}")

Number of data points added: 101 in ProductCollection


In [4]:
# load the rating data
data_file = open(file="./data/ratings/AugmentedRating.json", mode="r") 
data = json.load(data_file)
data_file.close()

actual_rating = db['ActualRating']

result = actual_rating.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {actual_rating.name}")

Number of data points added: 200000 in ActualRating
