The formatting for tables is pretty verbose - better to have this in its own file

In [6]:
%%file dynamodb_table_format.json
{
    "TableName": "registrations",
    "AttributeDefinitions": [
        {
            "AttributeName": "url",
            "AttributeType": "S"
        },
        {
            "AttributeName": "registrant",
            "AttributeType": "S"
        },
        {
            "AttributeName": "igsn",
            "AttributeType": "S"
        }
    ],
    "KeySchema": [
        {
            "AttributeName": "Registrant",
            "KeyType": "HASH"
        },
        {
            "AttributeName": "IGSN",
            "KeyType": "HASH"
        }
    ],
    "BillingMode": "PAY_PER_REQUEST",
    "Tags": [
        {
        "Key": "environment",
        "Value": "test"
        }
    ]
}

Overwriting dynamodb_table_format.json


And we can generate some records that we want to stick into this tale

In [33]:
import json, random
from string import ascii_letters

random.seed(314159) # set seed for reproduceability

def generate_registration_data(n_samples=1, max_chars=130):
    """
    Create IGSN registration data
    
    Parameters:
        n_samples - the number of records to create (default 1)
        max_chars - the maximum number of characters in an IGSN (default 130)
    """
    # Generate numbers
    numbers = ''.join(str(d) for d in range(10))
    allowed = ascii_letters + numbers + ';:_-=+~'

    # Generate IDs, write to file for later use
    fill_args = lambda ident: {
        'igsn': ident,
        'url': f'https://testing.igsn.org/igsn/{ident}',
        'registrant': 'test_framework'
    }
    return [fill_args(''.join([allowed[random.randint(0, len(allowed) - 1)] 
                              for _ in range(random.randint(3, max_chars))]))
           for _ in range(n_samples)]
    return post_samples
    

REGISTRATION_DATA = 'test_igsn_post_registration.json'
with open(REGISTRATION_DATA, 'w') as sink:
    json.dump(generate_registration_data(15), sink)
    
def get_registrations_from_file():
    "Load registration sample data"
    with open(REGISTRATION_DATA, 'r') as src:
        return json.load(src)
    
# Take a look at a couple of samples
print(json.dumps(post_samples[:2], indent=4))

[
    {
        "igsn": "JKmpNN-xveA_-txOa_rHQM8QRAlpKH_EFMdpnKMEuose:v2eFXNc",
        "url": "https://testing.igsn.org/igsn/JKmpNN-xveA_-txOa_rHQM8QRAlpKH_EFMdpnKMEuose:v2eFXNc",
        "registrant": "test_framework"
    },
    {
        "igsn": "0m-rS2xOnIK~FoeeP0TbWqCg;ZfptjLyt",
        "url": "https://testing.igsn.org/igsn/0m-rS2xOnIK~FoeeP0TbWqCg;ZfptjLyt",
        "registrant": "test_framework"
    }
]


Let's make a mock table and push our records into it.

We first create a grouper function since DynamoDB allows us to batch update records.

In [56]:
import itertools

def grouper(iterable, n, fillvalue=None):
    """
    Collect data into fixed-length chunks or blocks
    
    Example usage:
    
    >>> list(grouper(range(10), 3))
    [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9,)]
    
    >>> list(grouper(range(10), 3, fillvalue=-1))
    [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, -1, -1)]
    
    Parmaeters:
        iterable - the iterable to consume
        n - the number of items in a chunk
        fillvalue - if iterable // n != 0, the value to pad the last chunk with
            If None then the length of the last chunk will be less than n
    
    Returns:
        an iterator over chunks of data
    """
    args = [iter(iterable)] * n
    if fillvalue is None:
        for item in itertools.zip_longest(*args, fillvalue=None):
            while item[-1] is None:
                item = item[:-1]
            yield item
    else:
        print('here')
        yield from itertools.zip_longest(*args, fillvalue=fillvalue)

In [63]:
reg

{'igsn': '2Z1tR2YAHxFbhweOyp~OH9KzoCn3PNW7;XwPXufgKlsrosE',
 'url': 'https://testing.igsn.org/igsn/2Z1tR2YAHxFbhweOyp~OH9KzoCn3PNW7;XwPXufgKlsrosE',
 'registrant': 'test_framework'}

In [75]:
DYNAMODB_TYPE_MAPPING = {
    str: 'S',
    float: 'N',
    int: 'N',
    bytes: 'B',
    dict: 'M'
}

def map_dynamo_types(value):
    """
    Maps Python types to DynamoDB type strings for insertion into a table
    
    If type is not coerceable then a ValueError is raised
    """
    # Try single value first
    try:
        return DYNAMODB_TYPE_MAPPING[type(value)]
    except KeyError:
        pass
    
    # Next try an iterable set
    try:
        # Check for consistent typing
        try:
            for val in value:
                assert type(val) == type(value[0])
        except AssertionError:
            raise ValueError(f"Iterable {value} is not consistently typed")
            
        # Get the type string
        return DYNAMODB_TYPE_MAPPING[type(value[0])] + 'S'
        
    except TypeError:
        raise ValueError(f"Can't convert value {value} to DyanamoDB type")

def to_dynamodb_item(obj):
    "Convert a Python dictionary to an Item for putting in DynamoDB"
    return {
        k: {map_dynamo_types(v): v} for k, v in obj.items()
    }

In [76]:
to_dynamodb_item(registrations[0])

{'igsn': {'S': 'JKmpNN-xveA_-txOa_rHQM8QRAlpKH_EFMdpnKMEuose:v2eFXNc'},
 'url': {'S': 'https://testing.igsn.org/igsn/JKmpNN-xveA_-txOa_rHQM8QRAlpKH_EFMdpnKMEuose:v2eFXNc'},
 'registrant': {'S': 'test_framework'}}

In [83]:
import moto
import boto3
import json

with moto.mock_dynamodb2():
    dynamodb = boto3.resource("dynamodb")
    
    # Let's create the table
    with open('dynamodb_table_format.json', 'r') as src:
        table_fmt = json.load(src)
    table = dynamodb.create_table(**table_fmt)
    
    # We create a batch writer to automagically buffer operations into the table
    with table.batch_writer() as batch:
        for reg in map(to_dynamodb_item, get_registrations_from_file()):
            batch.put_item(reg)

TypeError: 'NoneType' object is not iterable

In [None]:


def map_dynamo_types

def to_dynamodb_item(obj):
    "Convert a Python dictionary to an Item for putting in DynamoDB"
    for key, value in obj.items():
        try:
            type_string = DYNAMODB_TYPE_MAPPING[type(value)]
        except KeyError:
            pass
        
        
            # we have an iterable set
            type_string = DYNAMODB_TYPE_MAPPING[type(value[0])]