# e-Commerce Example

Here are sample data and queries for the example. First, let's create a new table `Store` with partition key `PK` and sort key `SK`. After that, generate sample data. Putting sample data will be followed by the progress of requirements.

In [1]:
# import and get dynamodb resource
import csv
import random
from datetime import datetime, timedelta
import uuid
import boto3
import time
from pprint import pprint
from decimal import Decimal
from boto3.dynamodb.conditions import Key, Attr
from botocore.exceptions import ClientError

dynamodb = boto3.resource('dynamodb')

In [2]:
# delete Store table if neccessary
try:
    dynamodb.Table('Store').delete()
except ClientError:
    pass

In [3]:
# create a table and wait until done
store = dynamodb.create_table(
    TableName='Store',
    AttributeDefinitions=[
        {
            'AttributeName': 'PK',
            'AttributeType': 'S'
        },
        {
            'AttributeName': 'SK',
            'AttributeType': 'S'
        }
    ],
    KeySchema=[
        {
            'AttributeName': 'PK',
            'KeyType': 'HASH'
        },
        {
            'AttributeName': 'SK',
            'KeyType': 'RANGE'
        }
    ],
    BillingMode='PAY_PER_REQUEST'
)

store.wait_until_exists()

In [4]:
# generate sample user data
users = []

with open('data/users.csv', 'r') as f:
    for full_name, user_name, email in csv.reader(f):
        created_at = datetime.strftime(datetime.now() - timedelta(days=random.randint(0, 1825)), '%Y-%m-%d')
        users.append({
            'user_name': user_name,
            'full_name': full_name,
            'email': email,
            'created_at': created_at,
            'address': '..'
        })

pprint(users[0])

{'address': '..',
 'created_at': '2018-05-23',
 'email': 'trevor@gmail.com',
 'full_name': 'Trevor Sanderson',
 'user_name': 'trevor'}


In [5]:
# generate sample order and item data
orders = []

for user in users:
    for _ in range(random.randint(10, 100)):
        order = {
            'id': str(uuid.uuid4())[:8],
            'user_name': user['user_name'],
            'status': 'SHIPPED' if random.randint(0, 19) <= 18 else 'PLACED',
            'created_at': datetime.strftime(datetime.now() - timedelta(days=random.randint(0, 1825)), '%Y-%m-%d'),
            'address': '..'
        }
        order['status_id'] = order['status'] + '#' + order['id']
        if order['status'] == 'PLACED':
            order['placed_id'] = str(uuid.uuid4())[-4:]

        order['items'] = []

        for _ in range(random.randint(1, 10)):
            item = {
                'id': str(uuid.uuid4())[:8],
                'product_name': 'Product #' + str(random.randint(1, 1000)),
                'price': random.randint(1, 100000),
                'status': 'FILLED'
            }

            order['items'].append(item)
    
        orders.append(order)

pprint(orders[0])

{'address': '..',
 'created_at': '2018-12-08',
 'id': 'b52dc297',
 'items': [{'id': '79d870e6',
            'price': 6246,
            'product_name': 'Product #226',
            'status': 'FILLED'},
           {'id': '5c22cc1b',
            'price': 38981,
            'product_name': 'Product #815',
            'status': 'FILLED'},
           {'id': '18ce7696',
            'price': 22057,
            'product_name': 'Product #878',
            'status': 'FILLED'},
           {'id': 'bd4b1b97',
            'price': 7268,
            'product_name': 'Product #437',
            'status': 'FILLED'},
           {'id': '44bf32d3',
            'price': 1460,
            'product_name': 'Product #512',
            'status': 'FILLED'},
           {'id': '5602ce2d',
            'price': 23067,
            'product_name': 'Product #937',
            'status': 'FILLED'}],
 'status': 'SHIPPED',
 'status_id': 'SHIPPED#b52dc297',
 'user_name': 'trevor'}


Here are the five access patterns described in the slide. We'll handle one by one.

1. Get user profile
2. Get orders for users
3. Get single order and order items
4. Get orders for user by status
5. Get open orders

## Access Pattern 1. Get User Profile

To do this, put user data into the table.

In [6]:
# put user data into the table
with store.batch_writer() as batch:
    for user in users:
        batch.put_item(Item={
            'PK': 'USER#' + user['user_name'],
            'SK': 'PROFILE#' + user['user_name'],
            'UserName': user['user_name'],
            'FullName': user['full_name'],
            'Email': user['email'],
            'CreatedAt': user['created_at']
        })

This is pretty straightforward. Just put user name into `PK` and `SK` with prefixes.

In [7]:
# get user profile
response = store.get_item(
    Key={'PK': 'USER#claire', 'SK': 'PROFILE#claire'}
)

pprint(response)

{'Item': {'CreatedAt': '2018-11-30',
          'Email': 'claire@gmail.com',
          'FullName': 'Claire Hardacre',
          'PK': 'USER#claire',
          'SK': 'PROFILE#claire',
          'UserName': 'claire'},
 'ResponseMetadata': {'HTTPHeaders': {'connection': 'keep-alive',
                                      'content-length': '188',
                                      'content-type': 'application/x-amz-json-1.0',
                                      'date': 'Sun, 06 Dec 2020 05:42:19 GMT',
                                      'server': 'Server',
                                      'x-amz-crc32': '3844896343',
                                      'x-amzn-requestid': 'P98C4H1LCPTGRE0NC23ODBD6GNVV4KQNSO5AEMVJF66Q9ASUAAJG'},
                      'HTTPStatusCode': 200,
                      'RequestId': 'P98C4H1LCPTGRE0NC23ODBD6GNVV4KQNSO5AEMVJF66Q9ASUAAJG',
                      'RetryAttempts': 0}}


## Access Pattern 2. Get Orders for Users

To do this, put order data into the table.

In [8]:
# put order data into the table
with store.batch_writer() as batch:
    for order in orders:
        batch.put_item(Item={
            'PK': 'USER#' + order['user_name'],
            'SK': 'ORDER#' + order['id'],
            'UserName': order['user_name'],
            'OrderId': order['id'],
            'Status': order['status'],
            'CreatedAt': order['created_at'],
            'Address': order['address']
        })

To get all orders of the user, put user name into `PK` with prefix and order prefix into `SK`. For simplicity, we limit up to 5 items here.

In [9]:
# get orders of the user
response = store.query(
    KeyConditionExpression=Key('PK').eq('USER#claire') & Key('SK').begins_with('ORDER#'),
    Limit=5
)

pprint(response)

{'Count': 5,
 'Items': [{'Address': '..',
            'CreatedAt': '2019-01-14',
            'OrderId': '070b4025',
            'PK': 'USER#claire',
            'SK': 'ORDER#070b4025',
            'Status': 'SHIPPED',
            'UserName': 'claire'},
           {'Address': '..',
            'CreatedAt': '2020-09-12',
            'OrderId': '1e343784',
            'PK': 'USER#claire',
            'SK': 'ORDER#1e343784',
            'Status': 'SHIPPED',
            'UserName': 'claire'},
           {'Address': '..',
            'CreatedAt': '2016-02-13',
            'OrderId': '1fa97ed1',
            'PK': 'USER#claire',
            'SK': 'ORDER#1fa97ed1',
            'Status': 'SHIPPED',
            'UserName': 'claire'},
           {'Address': '..',
            'CreatedAt': '2016-09-19',
            'OrderId': '1fd48572',
            'PK': 'USER#claire',
            'SK': 'ORDER#1fd48572',
            'Status': 'SHIPPED',
            'UserName': 'claire'},
           {'Address': '..'

## Access Pattern 3. Get single order and order items

To do this, put item data into the table. We also need to create the first index, which is inverted index (swapping `PK` and `SK`).

In [13]:
# put item data into the table
with store.batch_writer() as batch:
    for order in orders:
        for item in order['items']:
            batch.put_item(Item={
                'PK': 'ITEM#' + item['id'],
                'SK': 'ORDER#' + order['id'],
                'ItemId': item['id'],
                'OrderId': order['id'],
                'ProductName': item['product_name'],
                'Price': item['price'],
                'Status': item['status']
            })

In [14]:
# create an index, which is inverted index
store = store.update(
    AttributeDefinitions=[
        {'AttributeName': 'PK', 'AttributeType': 'S'},
        {'AttributeName': 'SK', 'AttributeType': 'S'}
    ],
    GlobalSecondaryIndexUpdates=[
        {
            'Create': {
                'IndexName': 'GSI-1',
                'KeySchema': [
                    {'AttributeName': 'SK', 'KeyType': 'HASH'},
                    {'AttributeName': 'PK', 'KeyType': 'RANGE'}
                ],
                'Projection': {
                    'ProjectionType': 'ALL',
                }
            }
        }
    ]
)

In [15]:
# wait until the index is created
gsi_status = 'N/A'

while gsi_status != 'ACTIVE':
    store.reload()
    
    for index in store.global_secondary_indexes:
        if index['IndexName'] == 'GSI-1':
            gsi_status = index['IndexStatus']
            print(datetime.now(), gsi_status)
    
    time.sleep(10)

2020-12-06 05:53:32.451318 CREATING
2020-12-06 05:53:42.471485 CREATING
2020-12-06 05:53:52.490108 CREATING
2020-12-06 05:54:02.507786 CREATING
2020-12-06 05:54:12.527455 CREATING
2020-12-06 05:54:22.547533 CREATING
2020-12-06 05:54:32.563596 CREATING
2020-12-06 05:54:42.583155 CREATING
2020-12-06 05:54:52.602884 CREATING
2020-12-06 05:55:02.622595 CREATING
2020-12-06 05:55:12.637681 ACTIVE


Now the index `GSI-1`'s partition key `SK` has all the information of orders. Search an order on this index by putting order id into `SK`. The result will have the order's items and the orderer, which looks like a joined result.

In [16]:
# get an order and its items
response = store.query(
    IndexName='GSI-1',
    KeyConditionExpression=Key('SK').eq('ORDER#1fa97ed1')
)

pprint(response)

{'Count': 10,
 'Items': [{'ItemId': '1a5ff81e',
            'OrderId': '1fa97ed1',
            'PK': 'ITEM#1a5ff81e',
            'Price': Decimal('60889'),
            'ProductName': 'Product #382',
            'SK': 'ORDER#1fa97ed1',
            'Status': 'FILLED'},
           {'ItemId': '29be1912',
            'OrderId': '1fa97ed1',
            'PK': 'ITEM#29be1912',
            'Price': Decimal('30493'),
            'ProductName': 'Product #62',
            'SK': 'ORDER#1fa97ed1',
            'Status': 'FILLED'},
           {'ItemId': '5eefb01e',
            'OrderId': '1fa97ed1',
            'PK': 'ITEM#5eefb01e',
            'Price': Decimal('86714'),
            'ProductName': 'Product #640',
            'SK': 'ORDER#1fa97ed1',
            'Status': 'FILLED'},
           {'ItemId': '5fe504c2',
            'OrderId': '1fa97ed1',
            'PK': 'ITEM#5fe504c2',
            'Price': Decimal('44900'),
            'ProductName': 'Product #903',
            'SK': 'ORDER#1fa97ed1',


## Access Pattern 4. Get orders for user by status

Firstly, getting orders with user and status filters is not efficient with the current table model. Here is the reason why we should do something more.

In [17]:
# get orders for user by status with the current table model
response = store.query(
    KeyConditionExpression=Key('PK').eq('USER#claire'),
    FilterExpression=Attr('Status').eq('PLACED'),
    ReturnConsumedCapacity='INDEXES'
)

pprint(response)

{'ConsumedCapacity': {'CapacityUnits': 0.5,
                      'Table': {'CapacityUnits': 0.5},
                      'TableName': 'Store'},
 'Count': 1,
 'Items': [{'Address': '..',
            'CreatedAt': '2017-09-07',
            'OrderId': '5f3b0f46',
            'PK': 'USER#claire',
            'SK': 'ORDER#5f3b0f46',
            'Status': 'PLACED',
            'UserName': 'claire'}],
 'ResponseMetadata': {'HTTPHeaders': {'connection': 'keep-alive',
                                      'content-length': '314',
                                      'content-type': 'application/x-amz-json-1.0',
                                      'date': 'Sun, 06 Dec 2020 05:58:11 GMT',
                                      'server': 'Server',
                                      'x-amz-crc32': '3747601822',
                                      'x-amzn-requestid': '8I5FNVJURD4HUKMI9MUU7M6GBFVV4KQNSO5AEMVJF66Q9ASUAAJG'},
                      'HTTPStatusCode': 200,
                      'Req

Compare `Count` and `ScannedCount` values. The result means that DynamoDB retrieved `ScannedCount` items and filtered out most of the items in memory and returned `Count` items to us. Basically pricing depends on `ScannedCount`, not `Count`. Check `CapacityUnits` as well.

To enhance the cost efficiency and performance, we need to put `OrderStatusId` attribute in order items. After that, create an index on the new attribute.

In [18]:
# re-put order data with order_status_id attribute
with store.batch_writer() as batch:
    for order in orders:
        batch.put_item(Item={
            'PK': 'USER#' + order['user_name'],
            'SK': 'ORDER#' + order['id'],
            'UserName': order['user_name'],
            'OrderId': order['id'],
            'Status': order['status'],
            'CreatedAt': order['created_at'],
            'Address': order['address'],
            'OrderStatusId': order['status_id']
        })

In [19]:
# create an index on PK and OrderStatusId
store = store.update(
    AttributeDefinitions=[
        {'AttributeName': 'PK', 'AttributeType': 'S'},
        {'AttributeName': 'OrderStatusId', 'AttributeType': 'S'}
    ],
    GlobalSecondaryIndexUpdates=[
        {
            'Create': {
                'IndexName': 'GSI-2',
                'KeySchema': [
                    {'AttributeName': 'PK', 'KeyType': 'HASH'},
                    {'AttributeName': 'OrderStatusId', 'KeyType': 'RANGE'}
                ],
                'Projection': {
                    'ProjectionType': 'ALL',
                }
            }
        }
    ]
)

In [20]:
# wait until the index is created
gsi_status = 'N/A'

while gsi_status != 'ACTIVE':
    store.reload()
    
    for index in store.global_secondary_indexes:
        if index['IndexName'] == 'GSI-2':
            gsi_status = index['IndexStatus']
            print(datetime.now(), gsi_status)
    
    time.sleep(10)

2020-12-06 05:58:38.645060 CREATING
2020-12-06 05:58:48.664153 CREATING
2020-12-06 05:58:58.683650 CREATING
2020-12-06 05:59:08.702041 CREATING
2020-12-06 05:59:18.717678 CREATING
2020-12-06 05:59:28.736928 CREATING
2020-12-06 05:59:38.755710 CREATING
2020-12-06 05:59:48.769653 CREATING
2020-12-06 05:59:58.783272 CREATING
2020-12-06 06:00:08.793787 CREATING
2020-12-06 06:00:18.812803 ACTIVE


Now we query on the new index. We see `Count`, `ScannedCount` and `CapacityUnits` and compare to the original result.

In [21]:
# get orders for user by status with the new index
response = store.query(
    IndexName='GSI-2',
    KeyConditionExpression=Key('PK').eq('USER#claire') & Key('OrderStatusId').begins_with('PLACED#'),
    ReturnConsumedCapacity='INDEXES'
)

pprint(response)

{'ConsumedCapacity': {'CapacityUnits': 0.5,
                      'GlobalSecondaryIndexes': {'GSI-2': {'CapacityUnits': 0.5}},
                      'Table': {'CapacityUnits': 0.0},
                      'TableName': 'Store'},
 'Count': 1,
 'Items': [{'Address': '..',
            'CreatedAt': '2017-09-07',
            'OrderId': '5f3b0f46',
            'OrderStatusId': 'PLACED#5f3b0f46',
            'PK': 'USER#claire',
            'SK': 'ORDER#5f3b0f46',
            'Status': 'PLACED',
            'UserName': 'claire'}],
 'ResponseMetadata': {'HTTPHeaders': {'connection': 'keep-alive',
                                      'content-length': '410',
                                      'content-type': 'application/x-amz-json-1.0',
                                      'date': 'Sun, 06 Dec 2020 06:02:38 GMT',
                                      'server': 'Server',
                                      'x-amz-crc32': '2730285438',
                                      'x-amzn-requestid

## Access Pattern 5. Get Open Orders

It would be important for admins to check open orders (order status is placed) to do the post-processing. With the current table model, the only way to get data is full-scan over the table as follows.

In [22]:
# scan all data having Status as PLACED
store.scan(
    FilterExpression=Attr('Status').eq('PLACED'),
    ReturnConsumedCapacity='INDEXES'
)

{'Items': [{'Address': '..',
   'SK': 'ORDER#0cc5462c',
   'Status': 'PLACED',
   'PK': 'USER#alexander',
   'UserName': 'alexander',
   'OrderId': '0cc5462c',
   'CreatedAt': '2017-05-04',
   'OrderStatusId': 'PLACED#0cc5462c'},
  {'Address': '..',
   'SK': 'ORDER#b2fe6bb9',
   'Status': 'PLACED',
   'PK': 'USER#alexander',
   'UserName': 'alexander',
   'OrderId': 'b2fe6bb9',
   'CreatedAt': '2016-08-13',
   'OrderStatusId': 'PLACED#b2fe6bb9'},
  {'Address': '..',
   'SK': 'ORDER#b527e8ea',
   'Status': 'PLACED',
   'PK': 'USER#caroline',
   'UserName': 'caroline',
   'OrderId': 'b527e8ea',
   'CreatedAt': '2020-06-11',
   'OrderStatusId': 'PLACED#b527e8ea'},
  {'Address': '..',
   'SK': 'ORDER#230c0fd6',
   'Status': 'PLACED',
   'PK': 'USER#alexandra',
   'UserName': 'alexandra',
   'OrderId': '230c0fd6',
   'CreatedAt': '2018-06-08',
   'OrderStatusId': 'PLACED#230c0fd6'},
  {'Address': '..',
   'SK': 'ORDER#40480933',
   'Status': 'PLACED',
   'PK': 'USER#nathan',
   'UserName': 

See `Count`, `ScannedCount` and `CapacityUnits`. Obviously, this is time consuming and cost ineffective. To do it more elegantly, we add `PlacedId` attribute on the open orders and create a sparse index on the attribute.

In [23]:
# re-put open order data with placed_id attribute
with store.batch_writer() as batch:
    for order in orders:
        if 'placed_id' in order:
            batch.put_item(Item={
                'PK': 'USER#' + order['user_name'],
                'SK': 'ORDER#' + order['id'],
                'UserName': order['user_name'],
                'OrderId': order['id'],
                'Status': order['status'],
                'CreatedAt': order['created_at'],
                'Address': order['address'],
                'OrderStatusId': order['status_id'],
                'PlacedId': order['placed_id']
            })

In [24]:
# create an index on PlacedId
store = store.update(
    AttributeDefinitions=[
        {'AttributeName': 'PlacedId', 'AttributeType': 'S'}
    ],
    GlobalSecondaryIndexUpdates=[
        {
            'Create': {
                'IndexName': 'GSI-3',
                'KeySchema': [
                    {'AttributeName': 'PlacedId', 'KeyType': 'HASH'}
                ],
                'Projection': {
                    'ProjectionType': 'ALL',
                }
            }
        }
    ]
)

In [25]:
# wait until the index is created
gsi_status = 'N/A'

while gsi_status != 'ACTIVE':
    store.reload()
    
    for index in store.global_secondary_indexes:
        if index['IndexName'] == 'GSI-3':
            gsi_status = index['IndexStatus']
            print(datetime.now(), gsi_status)
    
    time.sleep(10)

2020-12-06 06:03:11.324153 CREATING
2020-12-06 06:03:21.346047 CREATING
2020-12-06 06:03:31.370892 CREATING
2020-12-06 06:03:41.391245 CREATING
2020-12-06 06:03:51.411144 CREATING
2020-12-06 06:04:01.430643 CREATING
2020-12-06 06:04:11.447292 CREATING
2020-12-06 06:04:21.458146 CREATING
2020-12-06 06:04:31.479946 CREATING
2020-12-06 06:04:41.501687 CREATING
2020-12-06 06:04:51.522162 CREATING
2020-12-06 06:05:01.544511 CREATING
2020-12-06 06:05:11.565321 CREATING
2020-12-06 06:05:21.576539 CREATING
2020-12-06 06:05:31.595466 ACTIVE


We're ready to do it better. Query on the new index and check `Count`, `ScannedCount` and `CapacityUnits` if it is done well.

In [26]:
# get open orders with the new index
response = store.scan(
    IndexName='GSI-3',
    ReturnConsumedCapacity='INDEXES'
)

pprint(response)

{'ConsumedCapacity': {'CapacityUnits': 2.0,
                      'GlobalSecondaryIndexes': {'GSI-3': {'CapacityUnits': 2.0}},
                      'Table': {'CapacityUnits': 0.0},
                      'TableName': 'Store'},
 'Count': 13,
 'Items': [{'Address': '..',
            'CreatedAt': '2020-06-11',
            'OrderId': 'b527e8ea',
            'OrderStatusId': 'PLACED#b527e8ea',
            'PK': 'USER#caroline',
            'PlacedId': 'e4e6',
            'SK': 'ORDER#b527e8ea',
            'Status': 'PLACED',
            'UserName': 'caroline'},
           {'Address': '..',
            'CreatedAt': '2017-09-07',
            'OrderId': '5f3b0f46',
            'OrderStatusId': 'PLACED#5f3b0f46',
            'PK': 'USER#claire',
            'PlacedId': '1400',
            'SK': 'ORDER#5f3b0f46',
            'Status': 'PLACED',
            'UserName': 'claire'},
           {'Address': '..',
            'CreatedAt': '2016-01-23',
            'OrderId': 'a5a6a8ab',
            'O