In [4]:
from boto3.dynamodb.conditions import Key

import boto3

In [5]:
# two options... can create boto3 client OR resource with DynamoDB

In [6]:
ddb = boto3.client('dynamodb', endpoint_url='http://localhost:8000')
print(ddb.list_tables())

{'TableNames': ['Employees'], 'ResponseMetadata': {'RequestId': '49d52a25-6536-4bc8-b3a2-ea1ceda71f58', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 01 Sep 2022 22:31:10 GMT', 'content-type': 'application/x-amz-json-1.0', 'x-amz-crc32': '2871489903', 'x-amzn-requestid': '49d52a25-6536-4bc8-b3a2-ea1ceda71f58', 'content-length': '28', 'server': 'Jetty(9.4.18.v20190429)'}, 'RetryAttempts': 0}}


In [11]:
ddb = boto3.resource('dynamodb', endpoint_url='http://localhost:8000')
tables = list(ddb.tables.all()) # prints empty list, as we have no tables
print(tables)

[dynamodb.Table(name='Employees')]


In [8]:
# we will use resources for this tutorial.
type(ddb)

boto3.resources.factory.dynamodb.ServiceResource

In [22]:
# create table using JSON format if it doesn't yet exist
table = None
if len(tables) > 0:
    for table_x in tables:
        if table_x.table_name == 'Employees':
            table = table_x
            break
else:
    table = ddb.create_table(
        TableName = 'Employees',
        # HASH and RANGE together make up a "primary key", which is unique in the table
        # you can have the same HASH multiple times, but with different RANGES (and vice versa?).
        KeySchema = [
            {
                'AttributeName': 'Name',
                'KeyType': 'HASH' # PARITION KEY
            },
            {
                'AttributeName': 'Email', # SORT KEY (optional)
                'KeyType': 'RANGE'
            }
        ],
        AttributeDefinitions = [
            {
                'AttributeName': 'Name',
                'AttributeType': 'S' # S == STRING
            },
            {
                'AttributeName': 'Email',
                'AttributeType': 'S' 
            }
        ],
        # doesn't matter for local instance
        ProvisionedThroughput={ # THE RATE AT WHICH YOU WANT YOUR READ/WRITE CAPACITY TO BE SET TO
            'ReadCapacityUnits': 1,
            'WriteCapacityUnits': 1
        },
        BillingMode='PAY_PER_REQUEST', # optional argument, does not matter for local instance
        # note that adding a single GSI doubles cost of writing,
        # and you can add a max of 20 GSIs. Replications to GSI table are quick
        # but not instantaneous.
        
        # also, write capacity on GSI table should honestly be HIGHER than 
        # write capacity on main table
        GlobalSecondaryIndexes=[
            {
                'IndexName': 'Country', # MUST BE UNIQUE ONLY FOR THIS TABLE
                # key schema must have at least a partition key. RANGE is optional
                'KeySchema': [
                    {
                        'AttributeName': 'Email', # Seems like this needs to match the RANGE value from main table, just from what I've seen others do... need confirmation though
                        'KeyType': 'HASH'
                    }
            ],
            # This represents attributes that are copied (projected) from main table into
            # secondary index table. These are in addition to PK atrributes and Index key attributes,
            # which are automatically projected.
                'Projection': {
                    # can be 'KEYS_ONLY', 'ALL' or "INCLUDE", the latter of which requires a list to be passed elsewhere. See boto3 documentation.
                    'ProjectionType': 'ALL'
                    # 'NonKeyAttributes': ['string']
                },
                'ProvisionedThroughput': {
                    'ReadCapacityUnits': 1,
                    'WriteCapacityUnits': 2
                }
            }
        ]
    )

In [None]:
# ** if you are coming back to this tutorial and have already created the table...**
table = ddb.Table("Employees")

In [10]:
# returns key schema
table.key_schema

NameError: name 'table' is not defined

In [None]:
# returns the table attribute definitions
table.attribute_definitions

In [None]:
# returned provisioned_throughput
table.provisioned_throughput

In [None]:
# find GSIs
table.global_secondary_indexes

In [None]:
# insert record
# ddb.put_item(TableName='Employees', Item={
#     'Name': {
#         'S':'Frey'
#     },
#     'Email': {
#         'S': 'jordan@freygeospatial.com'
#     },
#     'Country': {
#         'S', 'USA'
#     }
# })
table.put_item(Item={'Name':'Frey', 
               'Email': 'jordan@freygeospatial.com',
               'Country':'USA',
               'RandomAttribute': 'Blah!'})

In [None]:
# scan table
results = table.scan()
for item in results:
    print(item)
    

Items
Count
ScannedCount
ResponseMetadata


In [None]:
results["Items"]

[{'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'}]

In [None]:
results["Count"]

1

In [None]:
results["ScannedCount"]

1

In [None]:
results["ResponseMetadata"]

{'RequestId': '2a236310-bef8-4324-aa47-b0cc84e06265',
 'HTTPStatusCode': 200,
 'HTTPHeaders': {'date': 'Fri, 06 May 2022 13:43:05 GMT',
  'content-type': 'application/x-amz-json-1.0',
  'x-amz-crc32': '1839928699',
  'x-amzn-requestid': '2a236310-bef8-4324-aa47-b0cc84e06265',
  'content-length': '156',
  'server': 'Jetty(9.4.18.v20190429)'},
 'RetryAttempts': 0}

In [None]:
# we can request an item specifically if we have the Primary Key (both partition and sort keys). 
table.get_item(Key={'Name': 'Frey', 'Email': 'jordan@freygeospatial.com'}) # note that 'column names' are case sensitive!!

{'Item': {'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'},
 'ResponseMetadata': {'RequestId': 'e3c11f42-9c63-4d90-819c-ec88b5d60d21',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 11 Aug 2022 20:35:29 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'x-amz-crc32': '4093131092',
   'x-amzn-requestid': 'e3c11f42-9c63-4d90-819c-ec88b5d60d21',
   'content-length': '126',
   'server': 'Jetty(9.4.18.v20190429)'},
  'RetryAttempts': 0}}

In [None]:
# add another record...
table.put_item(Item={'Name':'Frey', 
               'Email': 'Frey2@freygeospatial.com',
               'Country':'USA',
               'RandomAttribute': 'Ugh!'})

{'ResponseMetadata': {'RequestId': 'a579b84c-f986-46bc-9acf-d5c54abb7bbf',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 09 May 2022 19:29:02 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'x-amz-crc32': '2745614147',
   'x-amzn-requestid': 'a579b84c-f986-46bc-9acf-d5c54abb7bbf',
   'content-length': '2',
   'server': 'Jetty(9.4.18.v20190429)'},
  'RetryAttempts': 0}}

In [None]:
# lets make sure we now have two records in the db
results = table.scan()
len(results["Items"])

2

In [None]:
# we can verify this further, if need be
results["Items"]

[{'Country': 'USA',
  'RandomAttribute': 'Ugh!',
  'Email': 'Frey2@freygeospatial.com',
  'Name': 'Frey'},
 {'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'}]

In [None]:
# lets grab the first item, regardless of Partition Key
table.scan(Limit=1)

{'Items': [{'Country': 'USA',
   'RandomAttribute': 'Ugh!',
   'Email': 'Frey2@freygeospatial.com',
   'Name': 'Frey'}],
 'Count': 1,
 'ScannedCount': 1,
 'LastEvaluatedKey': {'Email': 'Frey2@freygeospatial.com', 'Name': 'Frey'},
 'ResponseMetadata': {'RequestId': 'fca64d7e-e405-4c8f-912a-9e586f1b16d9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 11 Aug 2022 20:14:48 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'x-amz-crc32': '2783594237',
   'x-amzn-requestid': 'fca64d7e-e405-4c8f-912a-9e586f1b16d9',
   'content-length': '236',
   'server': 'Jetty(9.4.18.v20190429)'},
  'RetryAttempts': 0}}

In [None]:
# Remember that DynamoDB is SCHEMALESS -- we cannot specify
# columns other that the HASH (and maybe RANGE, which is optional when creating table).

# If we want to add a new item, we can specify any number of
# attributes there...

# to delete table D=
#table.delete()

# table.wait_until_not_exists() # check that table does not exist

In [None]:
# example of updating an item (table entry):
table.update_item(
    Key = {
        'Name': 'Frey',
        'Email': 'Frey2@freygeospatial.com'
    },
    UpdateExpression = 'SET Country = :updated',
    ExpressionAttributeValues = {':updated': 'Ukraine'}
)

{'ResponseMetadata': {'RequestId': 'f0ab37df-c5ee-4ca8-99f5-5b3a0e3d0e4e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 11 Aug 2022 20:28:30 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'x-amz-crc32': '2745614147',
   'x-amzn-requestid': 'f0ab37df-c5ee-4ca8-99f5-5b3a0e3d0e4e',
   'content-length': '2',
   'server': 'Jetty(9.4.18.v20190429)'},
  'RetryAttempts': 0}}

In [None]:
# lets check to make sure the record was updated:
table.get_item(Key={'Name': 'Frey', 'Email': 'Frey2@freygeospatial.com'})['Item']

{'Country': 'Ukraine',
 'RandomAttribute': 'Ugh!',
 'Email': 'Frey2@freygeospatial.com',
 'Name': 'Frey'}

In [None]:
# example of a QUERY --  not just a 'get item'
response = table.query(
    KeyConditionExpression=Key('Name').eq('Frey')
)

items = response['Items']
for item in items:
    print(item)


{'Country': 'Ukraine', 'RandomAttribute': 'Ugh!', 'Email': 'Frey2@freygeospatial.com', 'Name': 'Frey'}
{'Country': 'USA', 'RandomAttribute': 'Blah!', 'Email': 'jordan@freygeospatial.com', 'Name': 'Frey'}


In [None]:
# example of a QUERY --  not just a 'get item'
response = table.query(
    KeyConditionExpression=Key('Name').eq('Frey') & Key('Email').eq('Frey2@freygeospatial.com') # Key val must actually be a deisgnated key (Hash and range). Cannot query on country
)

items = response['Items']
for item in items:
    print(item)


{'Country': 'Ukraine', 'RandomAttribute': 'Ugh!', 'Email': 'Frey2@freygeospatial.com', 'Name': 'Frey'}


In [None]:
# delete an item
response = table.delete_item(
    Key={
        'Name': 'Frey',
        'Email': 'Frey2@freygeospatial.com'
    }
)

In [None]:
# check that deletion was a success
table.scan()["Items"]

[{'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'}]

In [27]:
# batch insert values:

# imagine that new_items is a list of variable length. could be
# len of 10 as in this case, but maybe this changes in your application
new_names = ['Frey' + str(x) for x in range(10)]
new_emails = ['Frey' + str(x) + '@freygeospatial.com' for x in range(10)]

# instead of 10 different write transactions, there is only one- very efficient and economical!
with table.batch_writer() as batch:
    for i in range(len(new_names)):
        kwargs = { 'Item': {"Name": new_names[i], "Email": new_emails[i], }, }
        batch.put_item(**kwargs)

In [28]:
table.scan()["Items"]

[{'Name': 'Frey3', 'Email': 'Frey3@freygeospatial.com'},
 {'Name': 'Frey1', 'Email': 'Frey1@freygeospatial.com'},
 {'Name': 'Frey4', 'Email': 'Frey4@freygeospatial.com'},
 {'Name': 'Frey0', 'Email': 'Frey0@freygeospatial.com'},
 {'Name': 'Frey8', 'Email': 'Frey8@freygeospatial.com'},
 {'Name': 'Frey9', 'Email': 'Frey9@freygeospatial.com'},
 {'Name': 'Frey6', 'Email': 'Frey6@freygeospatial.com'},
 {'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'},
 {'Name': 'Frey2', 'Email': 'Frey2@freygeospatial.com'},
 {'Name': 'Frey5', 'Email': 'Frey5@freygeospatial.com'},
 {'Name': 'Frey7', 'Email': 'Frey7@freygeospatial.com'}]

In [29]:
# now, let's delete those:

with table.batch_writer() as batch:
    for i in range(len(new_names)):
        kwargs = { 'Key': {"Name": new_names[i], "Email": new_emails[i], }, } # change 'Item' to 'Key'
        batch.delete_item(**kwargs)

table.scan()["Items"]

[{'Country': 'USA',
  'RandomAttribute': 'Blah!',
  'Email': 'jordan@freygeospatial.com',
  'Name': 'Frey'}]