In [None]:
import requests
import os
import json
import boto3

os.environ.setdefault('AWS_DEFAULT_REGION','us-east-1')

In [None]:
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')

def get_repos(since='333255899'):
    res = requests.get(
        'https://api.github.com/repositories?since=333255899',
        headers={'Authorization':'token '+GITHUB_TOKEN}
    )
    res = res.content.decode('utf-8')
    return json.loads(res)
    

def get_repo_details(owner, repo_name):
    res = requests.get(
        f'https://api.github.com/repos/{owner}/{repo_name}',
        headers={'Authorization':'token '+GITHUB_TOKEN}
    )
    data = res.content.decode('utf-8')
    return json.loads(data)

def extract_details(repo_details):
    return {
        "id" : repo_details['id'],
        "node_id": repo_details['node_id'],
        "name" : repo_details['name'],
        "full_name" : repo_details['full_name'],
        "owner" : {
            "login" : repo_details['owner']['login'],
            "node_id" : repo_details['owner']['node_id'],
            "type" : repo_details['owner']['type'],
            "site_admin" : repo_details['owner']['site_admin'],
        },
        "html_url" : repo_details['html_url'],
        "description" : repo_details['description'],
        "fork" : repo_details['fork'],
        "created_at" : repo_details['created_at']
        
    }

def construct_repo_records():
    repo_records = []
    for repo in get_repos():
        try:
            owner = repo['owner']['login']
            name = repo['name']
            repo_details = get_repo_details(owner,name)
            repo_records.append(extract_details(repo_details))
        except:
            pass
    return repo_records

In [None]:
dynamodb = boto3.resource('dynamodb')

#### CRUD operations in DynamoDB

In [None]:
employee = dynamodb.create_table(
    TableName='employee',
    KeySchema=[
        {
            'AttributeName' : 'eid',
            'KeyType' : 'HASH'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName':'eid',
            'AttributeType':'N'
        }
    ],
    BillingMode='PAY_PER_REQUEST'
    
)

In [None]:
employee.table_status

In [None]:
table = dynamodb.Table('employee')

In [None]:
table.table_status

In [None]:
from decimal import Decimal
emp1 = {
    'eid' : 1,
    'fn' : 'Manish',
    'ln' : 'Visave',
    'pn' : [123455,67899],
    'a' : {
        'a1':'911 BAKERS STREET',
        'c':'ROUND ROCK',
        's':'TX',
        'pc':'444333'
    }
}

emp2 = {
    'eid' : 2,
    'fn' : 'Girish',
    'ln' : 'Visave',
    'pn' : [123455,67899],
    'a' : {
        'a1':'911 BAKERS STREET',
        'c':'ROUND ROCK',
        's':'TX',
        'pc':'444333'
    }
}

emp3 = {
    'eid' : 3,
    'fn' : 'Amol',
    'ln' : 'Visave',
    'sal': Decimal('2000000.0'),
    'pn' : [123455,67899],
    'a' : {
        'a1':'911 BAKERS STREET',
        'c':'ROUND ROCK',
        's':'TX',
        'pc':'444333'
    }
}

In [None]:
table.put_item(Item=emp1)
table.put_item(Item=emp2)
table.put_item(Item=emp3)


In [None]:
data = table.get_item(Key={'eid':2})['Item']

In [None]:
data['sal'] = Decimal('7000000.0')

In [None]:
table.put_item(Item=data)

In [None]:
table.get_item(Key={'eid':2})['Item']

In [None]:
table.scan()['Items']

In [None]:
table.delete_item(Key={'eid':3})

In [None]:
table.scan()

In [None]:
table.delete()

### Creating tables in DynamoDB

In [None]:
dynamodb.create_table(
    TableName='ghrepos',
    KeySchema=[
        {
            'AttributeName':'id',
            'KeyType':'HASH'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName':'id',
            'AttributeType': 'N'
        }
    ],
    BillingMode='PAY_PER_REQUEST'
)

In [None]:
ghMarker = dynamodb.create_table(
    TableName='ghMarker',
    KeySchema=[
        {
            
        }
    ],
    AttributeDefinitions=[
        {
            
        }
    ],
    BillingMode='PAY_PER_REQUEST'
)

In [None]:
ghrepo_table = dynamodb.Table('ghrepos')

In [None]:
ghrepo_table.table_status

### Loading data in DynamoDB

In [None]:
def load_data(data, table):
    for item in data:
        table.put_item(Item=item)

In [None]:
data = construct_repo_records()

In [None]:
len(data)

In [None]:
%%time
load_data(data,ghrepo_table)

In [None]:
len(ghrepo_table.scan()['Items'])

In [None]:
ghrepo_table.scan()['Items'][0]

In [None]:
def delete_all_data(item,table):
    table.delete_item(Key={'id':item['id']})

In [None]:
d = map(lambda x: delete_all_data(x,ghrepo_table),data)

In [None]:
d

### Batch Insert

In [None]:
%%time
data = construct_repo_records()

In [None]:
def batch_insert(data, table, batch_size=50):
    with table.batch_writer() as batch:
        total = len(data)
        for i in range(0, total, batch_size):
            batch_data = data[i:i+batch_size]
            for repo in batch_data:
                batch.put_item(Item=repo)

In [None]:
%%time
batch_insert(data, ghrepo_table)

In [None]:
def batch_delete(table, batch_size=50):
    with table.batch_writer() as batch:
        data = table.scan()['Items']
        total = len(data)
        for i in range(0, total, batch_size):
            batch_data = data[i:i+batch_size]
            for repo in batch_data:
                key = {'id' : repo['id']}
                batch.delete_item(Key=key)

In [None]:
%%time
batch_delete(ghrepo_table)