# Azure Cosmos DB IoT Data Generator

This is an IoT data generator for Azure Cosmos DB.


## Author

I'm a Data Professional with 25+ years of experience on Analytics, Information Architecture, Big Data, Cloud Computing, Data Modeling, Machine Learning, and Database Administration. From startups to large organizations, always delivered commitment and innovation. After 2 years as AI instructor on Microsoft Cloud & AI team, migrated to the Cosmos DB team as a Senior Program Manager for Synapse Link, Power BI OLTP connector, Fabric Mirroring, Reserved Capacity, and Python SDK. Microsoft certified AI Engineer, Instructor, Golden Speaker, Hackathon Leader and SQL Specialist.

More details and contact information here: https://www.linkedin.com/in/rodrigossz/.


## Trace

Last Updated on Feb 2024

In [11]:
###########################################################################################
# Environment
###########################################################################################

import random # to create random IoT measures based on random variations
import datetime
import time
import azure.cosmos.exceptions as exceptions
from azure.cosmos.partition_key import PartitionKey
import uuid # to create an unique id for each data point
import warnings 
from azure.cosmos import CosmosClient
import os

# DATABASE_NAME = ''
# DEVICES_CONTAINER_NAME = ''
# CONTAINER_NAME = ''
# URL=''
# KEY= ''

# Client
client = CosmosClient(URL, credential=KEY)

# Database
try:
    database = client.create_database(DATABASE_NAME)
except exceptions.CosmosResourceExistsError:
    database = client.get_database_client(DATABASE_NAME)


# Container for IoT Devices 
try:
    container = database.create_container(id=DEVICES_CONTAINER_NAME, partition_key=PartitionKey(path="/id"))
except exceptions.CosmosResourceExistsError:
    container = database.get_container_client(DEVICES_CONTAINER_NAME)
except exceptions.CosmosHttpResponseError:
    raise

# Container for IoT Devices 
try:
    container = database.create_container(id=CONTAINER_NAME, partition_key=PartitionKey(path="/id"))
except exceptions.CosmosResourceExistsError:
    container = database.get_container_client(CONTAINER_NAME)
except exceptions.CosmosHttpResponseError:
    raise

print ('Environment OK')

Environment OK


In [12]:
###########################################################################################
# The function that creates and returns IoT values
###########################################################################################
from datetime import datetime

def RetunrIotValues(deviceId, measureType, unitSymbol, unit, baseValue, variationPercentage, isOutlier, outlierSignal):
    if (isOutlier == 0):
        value = random.randint(int(baseValue - (baseValue * (variationPercentage)/100)), int(baseValue + (baseValue * (variationPercentage)/100 )))
    else: #Outlier!    
        variationPercentage = int(variationPercentage*1.3) # 30% bigger Variation
        if (outlierSignal == 'Positive'):
            baseValue = int(baseValue*1.3) # 30% bigger base value
            value = random.randint(int(baseValue), int(baseValue + (baseValue * (variationPercentage)/100 )))
        else:
            baseValue = int(baseValue *0.7) # 30% smaller base value
            value = random.randint(int(baseValue - (baseValue * (variationPercentage)/100)), int(baseValue))

    docId = str(uuid.uuid4())
    latitude = str(value)+'.35.60'
    longitude = str(value+20)+'.35.60'

    IotData = {
    'id' : docId,
    'dateTime' : datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
    'deviceId' : deviceId,
    'measureType' : measureType,
    'unitSymbol' : unitSymbol,
    'unit' : unit,
    'measureValue' : value,
    'registeredTo': 'Microsoft Corporation - 1 Microsoft Way Redmond WA 98052 USA',
    'deviceModel': measureType+'_001',
    'deviceVersion': 'June, 2023',
    'deviceLatitude': latitude,
    'deviceLongitude': longitude,
    'deviceOperationalSystemVersion': 'V4 - Created by Rodrigo Souza as a demo for an IoT generator for Azure Cosmos DB.',
    'deviceSize': 'The RPM devices have 3 kilos and the size is 50 cm x 65 cm x 30 cm. The MW devices have 6 kilos and the size is 65 cm x 45 cm x 55 cm.'
    }

    return IotData


print('Function RetunrIotValues ok')

Function RetunrIotValues ok


In [13]:
###########################################################################################
# Function to detect prime numbers to create different cities based on the device suffix
###########################################################################################
def isprime(num):
    for n in range(2,int(num**0.5)+1):
        if num%n==0:
            return False
    return True
print ('Function for prime numbers ok')

Function for prime numbers ok


In [14]:
###########################################################################################
# The IoT function for devices and values/measures
###########################################################################################
def IotSimulator(numDevs=20, minutes=1,generateDevices=1 ,printOutput=0): 


    # Creates Devices list and, if generateDevices == 1,  populates the Devices Container, starting with dev-1
    if (generateDevices == 1):
        container = database.get_container_client(DEVICES_CONTAINER_NAME)
    print ('Preparing Devices')
    devPrefix = 'dev-'
    devicesList = [ ] 
    for i in range(1,numDevs+1):
        deviceId = devPrefix + str(i)
        if (generateDevices == 1):
            devId = str(uuid.uuid4())
            if (int(i)%2==0):
                City='Rio de Janeiro'
            else:
                if (isprime(int(i)) ):
                    City='New York'
                else:
                    City='Seattle'
            IoTDevice = {'id': devId,'deviceId':deviceId,'city':City}
            if (printOutput == 1):
                print ('Creating Device: ',IoTDevice)
            container.upsert_item(body=IoTDevice)
        devicesList.append(deviceId)

    # Units list
    unitList = [('Rotation Speed','RPM','Revolutions per Minute',3000,10, 'Positive'), ('Output','MW','MegaWatts',1500,10,'Negative')]

    # How many measures based on the number per minutes?
    numberMeasures = int(minutes)*60 
    accNumberMeasures = 0
    
    # Outliers
    accOutliers = 0
    ouliersSet = set()
    while len(ouliersSet) < int(minutes):
        outlier = random.randint(1, int(minutes)*60)
        ouliersSet.add(outlier)
    print ('Starting the process. The random outlier(s) will happen at these seconds: ',ouliersSet)
    
    # Create IoT Values based on a base value and a variation. Every device will return 1 value per unit per second.
    # Data is printed and saved into a Cosmos DB Container
    # Data modeling: We could have one document per device. But this approach is addressed in the data modeling notebook.
    container = database.get_container_client(CONTAINER_NAME)

    while (accNumberMeasures <= numberMeasures):
        #time.sleep(1)
        for deviceId in devicesList:
            for unit in unitList:
                if (accNumberMeasures in ouliersSet):  # If yes, time for an outlier
                    IotData = RetunrIotValues (deviceId,unit[0],unit[1],unit[2],unit[3],unit[4],1,unit[5])
                    print('Outlier:',deviceId,unit[2])
                else: # Regular measure
                    IotData = RetunrIotValues (deviceId,unit[0],unit[1],unit[2],unit[3],unit[4],0,unit[5])
                container.upsert_item(body=IotData)
                if (printOutput == 1):
                    print(IotData)
        accNumberMeasures +=1 

        
print ('Function iotSimulator ok')

Function iotSimulator ok


In [15]:
numDevs=10000
executionMinutes=10
printOutput=1
generateDevices=0
IotSimulator (numDevs,executionMinutes,generateDevices,printOutput)

Preparing Devices
Starting the process. The random outlier(s) will happen at these seconds:  {226, 67, 196, 201, 205, 558, 119, 500, 407, 504}
{'id': '33ecc4d7-28b7-4502-a260-e7b0aee60682', 'dateTime': '2024-02-13T13:33:51Z', 'deviceId': 'dev-1', 'measureType': 'Rotation Speed', 'unitSymbol': 'RPM', 'unit': 'Revolutions per Minute', 'measureValue': 2996, 'registeredTo': 'Microsoft Corporation - 1 Microsoft Way Redmond WA 98052 USA', 'deviceModel': 'Rotation Speed_001', 'deviceVersion': 'June, 2023', 'deviceLatitude': '2996.35.60', 'deviceLongitude': '3016.35.60', 'deviceOperationalSystemVersion': 'V4 - Created by Rodrigo Souza as a demo for an IoT generator for Azure Cosmos DB.', 'deviceSize': 'The RPM devices have 3 kilos and the size is 50 cm x 65 cm x 30 cm. The MW devices have 6 kilos and the size is 65 cm x 45 cm x 55 cm.'}
{'id': '1cc4be99-79d6-4e35-91ab-8b4e526f4f37', 'dateTime': '2024-02-13T13:33:51Z', 'deviceId': 'dev-1', 'measureType': 'Output', 'unitSymbol': 'MW', 'unit': 'M

KeyboardInterrupt: 