# 2.3 - Create Database on DynamoDB

In [18]:
import boto3
import csv

In [3]:
client = boto3.client('dynamodb', region_name='us-east-1')

In [10]:
def createTable():
    try:
        resp = client.create_table(
            TableName="myTweets",
            # Declare your Primary Key in the KeySchema argument
            KeySchema=[
                {
                    "AttributeName": "created_at",
                    "KeyType": "HASH"
                },
                {
                    "AttributeName": "user_name",
                    "KeyType": "RANGE"
                }
            ],
            # Any attributes used in KeySchema or Indexes must be declared in AttributeDefinitions
            AttributeDefinitions=[
                {
                    "AttributeName": "created_at",
                    "AttributeType": "S"
                },
                {
                    "AttributeName": "user_name",
                    "AttributeType": "S"
                }
            ],
            # ProvisionedThroughput controls the amount of data you can read or write to DynamoDB per second.
            # You can control read and write capacity independently.
            ProvisionedThroughput={
                "ReadCapacityUnits": 1,
                "WriteCapacityUnits": 1
            }
        )
        print("Table created successfully!")
    except Exception as e:
        print("Error creating table:")
        print(e)


In [11]:
createTable()

Table created successfully!


In [16]:
# Now that the table has been created, here we want to import our data.

# create an instance of dynamodb via boto3
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
# select our new db table
myTable = dynamodb.Table('myTweets')

In [24]:
# create a function to read each row 
def batch_write(table_name, rows):
    table = dynamodb.Table(table_name)
    
    with table.batch_writer() as batch:
        for row in rows:
            batch.put_item(Item=row)
        return True
    
def read_csv(csv_file, list):
    # get all rows
    rows = csv.DictReader(open(csv_file, errors="ignore")) 
    
    for row in rows:
        list.append(row)
        
if __name__ == '__main__':
    table_name = 'myTweets'
    file_name = 'marks-tweets.csv'
    items = []
    
    read_csv(file_name, items)
    
    status = batch_write(table_name, items)
    
    if(status):
        print('csv inserted')
    else:
        print('Error inserting csv')
        
# source : https://www.youtube.com/watch?v=MOaXGYgqipQ

ClientError: An error occurred (ValidationException) when calling the BatchWriteItem operation: Provided list of item keys contains duplicates

The above threw an error, but I managed to upload data.

In [27]:
# get item
resp = table.get_item(Key={"created_at": "Tue May 18 15:35:01 +0000 2021", 
                           "user_name": "Stefan Hawley"})

print(resp['Item'])

{'hashtags': "[{'text': 'GlosBirds', 'indices': [75, 85]}]", 'screen_name': 'Hawley15', 'location': 'Gloucester, England', 'created_at': 'Tue May 18 15:35:01 +0000 2021', 'mentions': '[]', 'text': '12+ Hobbies over the court lake this afternoon. At least 1 with a ring on. #GlosBirds https://t.co/FVAmjxlevz', 'user_name': 'Stefan Hawley'}
