In [5]:
import os
import boto3
from botocore import UNSIGNED, exceptions
from botocore.client import Config

from datetime import date, timedelta
import datetime as dt

In [2]:
bucket_name =  'aws-public-blockchain'

blocks_prefix = 'v1.0/eth/blocks/date='
transactions_prefix = 'v1.0/eth/transactions/date='

blocks_output = 'raw/eth/blocks'
transaction_output = 'raw/eth/transactions'

In [6]:
def prepare_object_prefix(todays_date:date=date(2024, 4, 8)):
    
    start_date = todays_date - dt.timedelta(days=7)
    end_date = todays_date - dt.timedelta(days=0)
    
    block_objs = []
    transaction_objs = []
    
    for n in range(int((end_date - start_date).days)):
        
        date_prefix = start_date + timedelta(n)
        
        block_obj_prefix = f"{blocks_prefix}{date_prefix}/"
        transaction_obj_prefix = f"{transactions_prefix}{date_prefix}/"
        
        block_objs.append(block_obj_prefix)
        transaction_objs.append(transaction_obj_prefix)
    
    return(block_objs, transaction_objs)
        

In [10]:
def download_and_verify(Bucket, Key, Filename):
    s3_client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
    try:
#         os.remove(Filename)
        s3_client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
        s3_client.download_file(Bucket,Key,Filename)
        return os.path.exists(Filename)
    except exceptions.ClientError as error:
        print(error.response['Error']['Code']) #a summary of what went wrong
        print(error.response['Error']['Message']) #explanation of what went wrong
        return False

In [11]:
def download_raw_data(block_files=[], transaction_files=[], outpath:str='raw/eth/'):
    
    s3_resource = boto3.resource('s3', config=Config(signature_version=UNSIGNED))
    bucket = s3_resource.Bucket(bucket_name)
    
    for file_loc in block_files:
        
        bucket_obj = bucket.objects.filter(Prefix=file_loc)
        fdate = file_loc.split('/')[3]
        output_path = f"{blocks_output}/{fdate}/"
        
        if not os.path.isdir(output_path):
            os.makedirs(output_path)
        
        for item in bucket_obj:       
            file_name = f"{output_path}{item.key.split('/')[-1]}"
            
            if not os.path.isfile(file_name):
                download_and_verify(bucket_name, item.key, file_name)
                
    
    for file_loc in transaction_files:
        
        bucket_obj = bucket.objects.filter(Prefix=file_loc)
        fdate = file_loc.split('/')[3]
        output_path = f"{transaction_output}/{fdate}/"
        
        if not os.path.isdir(output_path):
            os.makedirs(output_path)
        
        for item in bucket_obj:       
            file_name = f"{output_path}{item.key.split('/')[-1]}"
            
            if not os.path.isfile(file_name):
                download_and_verify(bucket_name, item.key, file_name)

In [12]:
blocks, transactions = prepare_object_prefix()
download_raw_data(blocks, transactions)