In [None]:
! pip install azure-cosmos
! pip install azure-core
! pip install python-dotenv
! pip install aiohttp

In [11]:
#Cosmos DB imports
from azure.cosmos.aio import CosmosClient
from azure.cosmos import PartitionKey
from azure.cosmos import ThroughputProperties
from openai import AzureOpenAI

import json
from dotenv import dotenv_values
from uuid import uuid4 as GUID

import asyncio


In [12]:
env_name = "myconfig.env" 
config = dotenv_values(env_name)

# Cosmos Client
cosmos_endpoint = config['cosmos_endpoint']
cosmos_key = config['cosmos_key']
database_name = config['cosmos_database']
actual_ratings_name = config['cosmos_actual_ratings']
predicted_ratings_name = config['cosmos_predicted_ratings']
product_catalog_name = config['cosmos_product_catalog']

cosmos_client = CosmosClient(cosmos_endpoint, cosmos_key)

# OpenAI
openai_type = config['openai_type']
openai_key = config['openai_key']
openai_base = config['openai_endpoint']
openai_version = config['openai_version']
openai_embeddings = config['openai_embeddings_deployment']

openai_client = AzureOpenAI(
    api_key=openai_key,
    api_version=openai_version,
    azure_endpoint = openai_base
)

In [7]:
async def create_collections():
    
    # Drop the database to recreate from scratch
    # await cosmos_client.delete_database(database_name)

    # Database
    database = await cosmos_client.create_database_if_not_exists(id=database_name)

    # Ratings Data Collections
    await database.create_container_if_not_exists(
        id=actual_ratings_name, 
        partition_key=PartitionKey(path="/UserId"),
        offer_throughput=ThroughputProperties(auto_scale_max_throughput=50000))

    await database.create_container_if_not_exists(
        id=predicted_ratings_name, 
        partition_key=PartitionKey(path="/UserId"),
        offer_throughput=ThroughputProperties(auto_scale_max_throughput=4000))


    # Product Catalog Collection
    # Define the vector policy for the collection
    vector_embedding_policy = {
        "vectorEmbeddings": [
            {
                "path":"/Embedding",
                "dataType":"float32",
                "distanceFunction":"cosine",
                "dimensions":1536
            }
        ]
    }

    # Define the indexing policy for the collection
    indexing_policy = {
        "includedPaths": [
            {
                "path": "/*"
            }
        ],
        "excludedPaths": [
            {
                "path": "/\"_etag\"/?"
            }
        ],
        "vectorIndexes": [
            {"path": "/Embedding",
            "type": "diskANN"
            }
        ]
    }

    # Create the product catalog collection
    await database.create_container_if_not_exists(
        id=product_catalog_name,
        partition_key=PartitionKey(path='/id', kind='Hash'),
        indexing_policy=indexing_policy,
        vector_embedding_policy=vector_embedding_policy,
        offer_throughput=ThroughputProperties(auto_scale_max_throughput=4000))

await create_collections()

In [13]:
def generate_azure_openai_embeddings(text):
    
    response = openai_client.embeddings.create(
        input = text, 
        model = openai_embeddings
    )
    
    embeddings = response.data[0].embedding
    return embeddings

In [None]:
async def load_product_catalog():
    # Load the product catalog data
    with open(file="./data/catalog/catalog.json", mode="r") as data_file:
        data = json.load(data_file)

    database = cosmos_client.get_database_client(database_name)
    product_catalog_container = database.get_container_client(product_catalog_name)
    
    i=0
    # Insert the data into the Product Collection
    for item in data:
        i+=1
        # Copy the Id to id, is also partition key  
        item['id'] = str(item['Id'])
        text = "Brand: " + item['Brand'] + " ,Type: " + item['Type'] +\
        " ,Name: " + item['Name'] + " ,Description: " + item['Description']
        item['Embedding'] = generate_azure_openai_embeddings(text)
        print(len(item['Embedding']))  
        await product_catalog_container.upsert_item(item)

    print(f"Number of products added: {i}")


await load_product_catalog()

In [9]:
async def load_actual_ratings():
    # Load the customer ratings
    with open(file="./data/ratings/actualRatings.json", mode="r") as data_file:
        data = json.load(data_file)

    database = cosmos_client.get_database_client(database_name)
    actual_ratings_container = database.get_container_client(actual_ratings_name)

    i=0
    # Insert the data into the actual ratings Collection
    for item in data:
        i+=1
        # Add a unique id to the item   
        item['id'] = str(GUID())
        await actual_ratings_container.create_item(item)

    print(f"Number of customer ratings added: {i}")

# Not necessary to load the actual ratings into container
# await load_actual_ratings()