In [None]:
from azure.cosmos import CosmosClient, PartitionKey
from dotenv import dotenv_values
import json

In [None]:
env_name = "config.env" # following example.env template change to your own .env file name
config = dotenv_values(env_name)

cosmos_conn = config['cosmos_nosql_connection_string']
cosmos_key = config['cosmos_nosql_key']
cosmos_database = config['cosmos_database_name']
collection = config['cosmos_collection_name']

# Create the client, db, collection, and policies

In [None]:
client = CosmosClient(cosmos_conn, credential=cosmos_key)

In [None]:
db = client.create_database_if_not_exists(cosmos_database)

In [None]:
# Define computed properties
computed_properties = [
    {'name': "cp_overviewLower", 'query': "SELECT VALUE LOWER(c.overview) FROM c"}, 
    {'name': "cp_overviewLength",'query': "SELECT VALUE LENGTH(c.overview) FROM c"},
    {'name': "cp_genreCount",'query': 'SELECT VALUE ARRAY_LENGTH(c.genres) FROM c'},
    {'name': "cp_toVectorize",'query': 'SELECT VALUE CONCAT("The movie: ", c.original_title, " is about this: ", c.overview, " and was released on: ", c.release_date) FROM c'}
    ]

indexing_policy = {
   "includedPaths": [
        {
            "path": "/*"
        },
        {   
            "path": "/cp_overviewLower/?"
        },
        {
            "path": "/cp_overviewLength/?"
        },
        {
            "path": "/cp_toVectorize/?"
        },
        {
            "path": "/cp_genreCount/?"
        }
      ],
      "excludedPaths": [
        {
            "path": "/\"_etag\"/?"
        }
      ]
} 

container = db.create_container_if_not_exists(id=collection,
                                                 partition_key=PartitionKey(path='/id'),
                                                 computed_properties=computed_properties,
                                                 indexing_policy=indexing_policy,
                                                 offer_throughput=1000)

# Insert data into the collection

In [None]:
# Load the data file
data =[]
with open('MovieLens-4489-256D.json', 'r') as d:
    data = json.load(d)

In [None]:
counter = 0
for item in data:
    #The following code to create vector embeddings for the data is commented out as the sample data is already vectorized.
    #vectorArray = generate_embeddings("Title:" + data[i]['original_title'] + ", Tagline:" + data[i]['tagline'] + ", Overview:" + data[i]['overview'])
    #object[cosmos_vector_property] = vectorArray
    container.upsert_item(body=item)

    # print progress every 100 upserts. 
    counter += 1
    if counter % 100 == 0:
            print("Inserted {} documents into collection.".format(counter))

# Query the data

In [None]:
# helper function for queries and printing results
def query(query_string):
    results = container.query_items(
        query=query_string,
        enable_cross_partition_query=True,
        populate_query_metrics=True)
    results = list(results)

    print(json.dumps(results, indent=4))
    print("RU charge:", container.client_connection.last_response_headers['x-ms-request-charge'])
    print(container.client_connection.last_response_headers['x-ms-documentdb-query-metrics'].split(";")[0])

# String LENGTH Example

**Query using `c.overview` with `LENGTH()`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE LENGTH(c.overview) > 100
    '''
query(query_string)

**Query using `c.cp_overviewLength`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE c.cp_overviewLength > 100
    '''

query(query_string)

# CONTAINS Example

**Query using `c.overview` with `CONTAINS`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE CONTAINS(c.overview, "buzz lightyear", true)
    '''

query(query_string)

**Query using `c.cp_overviewLower` with `CONTAINS`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE CONTAINS(c.cp_overviewLower, "buzz lightyear")
    '''

query(query_string)

# ARRAY_LENGTH Example

**Query using `ARRAY_LENGTH` on `c.genres`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE ARRAY_LENGTH(c.genres) > 2
    '''
query(query_string)

**Query using `c.cp_genreCount`**

In [None]:
query_string = '''
    SELECT c.id, c.overview
    FROM c
    WHERE c.cp_genreCount > 2
    '''
query(query_string)

# CONTCAT Example

**Query ising `CONCAT` on multiple properties**

In [None]:
query_string = '''
SELECT TOP 20 VALUE CONCAT("The movie: ", c.original_title, " is about this: ", c.overview, " and was released on: ", c.release_date) 
FROM c
'''
query(query_string)

**Query using `c.cp_toVectorize`**

In [None]:
query_string = '''
SELECT TOP 20 c.cp_toVectorize 
FROM c
'''
query(query_string)