# AWS Blockchain Automisation Script

## Multiple Steps needed:
1. Launch the VM according to settings
2. Configure and Install everything on launched VMs (network settings, packages, ...) 
3. Run Experiments on VM (TODO)
4. After finishing the experiments, send metrics to database (TODO)
5. Terminate VMs and calculate aws costs of VMs and storage

## TODO: Introduce Logging of this script

## Ensure that .aws/config, .aws/credentials and correct ssh keys are configured on the machine where the script is executed!

In [1]:
import sys, os, pprint
import json
import botocore, boto3
import getpass
import re
import datetime, pytz, time
utc = pytz.utc
from dateutil import tz
import json
import numpy as np
from pkg_resources import resource_filename
from dateutil import parser

from web3 import Web3

## logging
import logging
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("py4j").setLevel(logging.ERROR)


#Can we do this?
print("Enter proxy password:")
password = getpass.getpass()

#Set proxy
#technical user
#do you need proxy? @emil
os.environ["HTTPS_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"
os.environ["HTTP_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"
os.environ["NO_PROXY"] = "localhost,127.0.0.1,.muc,.aws.cloud.bmw,.azure.cloud.bmw,.bmw.corp,.bmwgroup.net"


#print(os.environ)

Enter proxy password:


 ········


## Experiment Settings (#VMs, storage, network_settings, aws profile, ...)

### Keep in Mind: If UserData script mounts a drive, the drive needs to be provided in the first place using the blockdevicemappings, else nothing can be mounted

In [58]:
#This  config contains everything about the experiment
config={
    "vm_count": 4,
    "instance_type": "t2.micro",
    "image": {
         "image_id": None,
         "os": "ubuntu",
         "version": 18,
         "permissions": "default"
        },
    "subnet_id": "subnet-0ac7aeeec87150dd7",
    "security_group_id": ["sg-0db312b6f84d66889"],
    "user": "ubuntu",
    "profile": "block_exp",
    "key_name": "blockchain",
    "tag_name": "blockchain_philipp",
    "exp_type":"geth",
    "user_data_script":"EC2_instance_bootstrap_geth.sh",
    "storage_settings" : [
        {
            'DeviceName': "/dev/sdb",
            'VirtualName': 'string',
            'Ebs': {
                'DeleteOnTermination': True,
                'VolumeSize': 32,
                'VolumeType': 'gp2',
                'Encrypted': True,
                'KmsKeyId': 'arn:aws:kms:eu-central-1:731899578576:key/a808826d-e460-4271-a23b-29e1e0807c1d'
            },
        },
    ],
}


#read contents of shell script
with open(config['user_data_script'], 'r') as content_file:
    user_data = content_file.read()
    
#print(repr(user_data))




## Search for the newest stable ubuntu image ID

You can select an AMI to use based on the following characteristics: (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ComponentsAMIs.html)

 * Region (see Regions and Availability Zones) -> flexibel (default frankfurt)

 * Operating system -> ubuntu (version?) (make it flexibel)

 * Architecture (32-bit or 64-bit) -> 64bit (x86_64)

 * Launch Permissions

 * Storage for the Root Device
 
 * Owner? -> 099720109477
 
 * https://askubuntu.com/a/53586

In [59]:
pprnt = pprint.PrettyPrinter(indent=1)

def newest_image(list_of_images):
    latest = None
    for image in list_of_images:
        if not latest:
            latest = image
            continue

        if parser.parse(image['CreationDate']) > parser.parse(latest['CreationDate']):
            latest = image

    return latest
 
if config['image']['image_id'] == None:
    session = boto3.Session(profile_name=config['profile'])
    ec2 = session.client('ec2', region_name='eu-central-1')
    #pprnt.pprint(ec2.describe_instances())
 
    # Find the latest official Ubuntu image from Canonical(owner = 099720109477)
    #aws ec2 describe-images --owners 099720109477 --filters 'Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-*-18*-amd64-server-????????' 'Name=state,Values=available' --output json | jq -r '.Images | sort_by(.CreationDate) | last(.[])'

    amis = ec2.describe_images(
         Filters=[
             {
                 'Name': 'name',
                 'Values': [f"{config['image']['os']}/images/hvm-ssd/{config['image']['os']}-*-{config['image']['version']}*-amd64-server-????????"]
             },
             {
                 'Name': 'architecture',
                 'Values': ['x86_64']
             },
             {
                 'Name': 'state',
                 'Values': ['available']
             },
             {
                 'Name': 'root-device-type',
                 'Values': ['ebs']
             }
         ],
         Owners=[
             '099720109477',
         ]
     )
    #pprnt.pprint(amis["Images"])
    #pprnt.pprint(amis)
    image = newest_image(amis['Images'])
    config['image']['image_id'] = image["ImageId"]

    #root_storage_mapping = image["BlockDeviceMappings"]
    #print([x for x in source_image["BlockDeviceMappings"]])


ec2 = session.resource('ec2')
image = ec2.Image(config['image']['image_id'])
root_storage_mapping = image.block_device_mappings

print("Selected Image: " + image.description)
    

Selected Image: Canonical, Ubuntu, 18.04 LTS, amd64 bionic image build on 2019-04-29


## Start x VMs according to settings, configure the launched VMs according to given shell script

In [60]:
#added config and credentials via terminal /home/q481264/.aws
#To ensure faster instance launches, break up large requests into smaller batches. 
#For example, create five separate launch requests for 100 instances each instead of one launch request for 500 instances.
session = boto3.Session(profile_name=config['profile'])
ec2 = session.resource('ec2', region_name='eu-central-1')
ec2_instances = ec2.create_instances(
    ImageId = config['image']['image_id'],
    MinCount = config['vm_count'],
    MaxCount = config['vm_count'],
    InstanceType = config['instance_type'],
    KeyName = config['key_name'],
    SubnetId = config['subnet_id'],
    BlockDeviceMappings = config['storage_settings'],
    UserData = user_data,
    TagSpecifications=[
        {
            'ResourceType': "instance",
            'Tags': [
                {
                    'Key': 'Creator',
                    'Value': config['tag_name']
                },
                {
                    'Key': 'Name',
                    'Value': config['tag_name']
                },
                    ]
        },
                      ],
SecurityGroupIds=config['security_group_id']
)

#Add experiment name tag
#Does experiment already exist? experiment-date-?hash?
# -> Build pipeline which supports different experiment settings/framweworks/...
##How to identify the instances of different experiments?
#Tag idea: Exp-Ethereum-010419-1223-Node1
#UserData = user_data,
#BlockDeviceMappings = storage_settings

In [61]:
ips=[]
for i in ec2_instances:
    i.wait_until_running()
    i.load()
    print(f"ID: {i.id}, State: {i.state['Name']}, IP: {i.private_ip_address}")
    ips.append(i.private_ip_address)

#add no procy for all VM ips
os.environ["NO_PROXY"] = f"localhost,127.0.0.1,.muc,.aws.cloud.bmw,.azure.cloud.bmw,.bmw.corp,.bmwgroup.net,{','.join(str(ip) for ip in ips)}"
    
print(f"You can now access machines via: ssh -i \"path to {config['key_name']} key\" ubuntu@{ips} (if user is ubuntu) ")
print(f"e.g. ssh -i ~/.ssh/blockchain ubuntu@{ips[0]}")
          
          
#add ips to config
config['ips'] = ips

ID: i-0cab42b1aae60705d, State: running, IP: 10.3.2.77
ID: i-074513fadd440f4eb, State: running, IP: 10.3.2.90
ID: i-0fafb7f27165d60e0, State: running, IP: 10.3.2.72
ID: i-0e1434abd34900f06, State: running, IP: 10.3.2.74
You can now access machines via: ssh -i "path to blockchain key" ubuntu@['10.3.2.77', '10.3.2.90', '10.3.2.72', '10.3.2.74'] (if user is ubuntu) 
e.g. ssh -i ~/.ssh/blockchain ubuntu@10.3.2.77


In [62]:
#Give launched instances tag with time/type of experiment/number of node
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')
for index, i in enumerate(ec2_instances):
    exp_tag = f"exp_{st}_{config['exp_type']}_Node{index}"
    ec2.create_tags(Resources=[
        i.id,
    ],
    Tags=[
        {
            'Key': 'exp_tag',
            'Value': exp_tag
        },
    ])

## Record launch times of all VMs (later needed for calculating aws costs)

In [63]:
launch_times = []
for i in ec2_instances:
    print("Launch Time: " + str(i.launch_time))
    #get launch time
    launch_times.append(i.launch_time.replace(tzinfo=None))
    

Launch Time: 2019-05-03 10:58:25+00:00
Launch Time: 2019-05-03 10:58:25+00:00
Launch Time: 2019-05-03 10:58:25+00:00
Launch Time: 2019-05-03 10:58:25+00:00


# Get Ethereum Accounts from all Nodes
## ssh key is needed for scp to work (need to be present where the jupyter notebook is executed)

In [64]:
#Make experiment folder where all info is stored
#mkdir -p parentfolder/{subfolder1,subfolder2,subfolder3}
exp_dir = f"exp_{st}_{config['exp_type']}"
cmd = f"mkdir {exp_dir}"
!{cmd}
cmd = f"mkdir -p {exp_dir}/accounts"
!{cmd}
cmd = f"mkdir {exp_dir}/enodes"
!{cmd}
cmd = f"mkdir {exp_dir}/geth_logs"
!{cmd}
cmd = f"mkdir {exp_dir}/user_data_logs"
!{cmd}

with open(f"{exp_dir}/config.json", 'w') as outfile:  
    json.dump(config, outfile)


In [69]:
scp_flags = "-i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
for index, i in enumerate(ec2_instances):
    ssh_cmd = f"ssh {scp_flags} -t ubuntu@{i.private_ip_address}"
    cmd = f'{ssh_cmd}  "[[ -f /var/log/user_data_success.log ]] && echo "File exists" || echo "File does not exist""'
    output = !{cmd}
    print(output[2])

File exists
File exists
File exists
File exists


In [68]:
print(output[2])

File exists


In [70]:
# how to wait until all VMs are finished with setting up UserData?
#cmd = ssh -q $HOST [[ -f $FILE_PATH ]] && echo "File exists" || echo "File does not exist";

#-o UserKnownHostsFile=/dev/null
scp_flags = "-i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
for index, i in enumerate(ec2_instances):
    #get account from all instances
    cmd = f"scp {scp_flags} ubuntu@{i.private_ip_address}:/data/gethNetwork/account.txt {exp_dir}/accounts/account_node_{index}.txt "
    print(cmd)
    !{cmd}
    


scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.77:/data/gethNetwork/account.txt exp_2019-05-03_13-00-01_geth/accounts/account_node_0.txt 
account.txt                                   100%   41     0.0KB/s   00:00    
scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.90:/data/gethNetwork/account.txt exp_2019-05-03_13-00-01_geth/accounts/account_node_1.txt 
account.txt                                   100%   41     0.0KB/s   00:00    
scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.72:/data/gethNetwork/account.txt exp_2019-05-03_13-00-01_geth/accounts/account_node_2.txt 
account.txt                                   100%   41     0.0KB/s   00:00    
scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.74:/data/gethNetwork/account.txt exp_2019-05-03_13-00-01_geth/accounts/account_node_3.t

In [71]:
all_accounts = []
path = f"{exp_dir}/accounts"
fileList = os.listdir(path)
for file in fileList:
    file = open(os.path.join(path + "/"+ file), 'r')
    all_accounts.append(file.read())
    file.close()
    
all_accounts = [x.rstrip() for x in all_accounts]
print(all_accounts)

['63ee30bd4a47b4877380c7e07f61c26871982d98', 'c2842961f53c0074a299a65863a4c16572e6f021', 'ce20ab481b0fdc452fa232c2aa7fc5d66d66dadf', '4bb8088de899d80d7f1dd6d4c96fed18d6d52070']


## Build Genesis file, distribute it to VMs and start geth nodes with it
### TODO: Make Genesis flexible for multiple consensus mechanisms

In [72]:
balances = ["0x200000000000000000000000000000000000000000000000000000000000000" for x in all_accounts]
base_balances = {   "0000000000000000000000000000000000000001": { "balance": "1" },
                    "0000000000000000000000000000000000000002": { "balance": "1" },
                    "0000000000000000000000000000000000000003": { "balance": "1" },
                    "0000000000000000000000000000000000000004": { "balance": "1" },
                    "0000000000000000000000000000000000000005": { "balance": "1" },
                    "0000000000000000000000000000000000000006": { "balance": "1" },
                    "0000000000000000000000000000000000000007": { "balance": "1" },
                    "0000000000000000000000000000000000000008": { "balance": "1" }}
additional_balances = {str(x): {"balance": str(y)} for x, y in zip(all_accounts,balances)}
merged_balances = {**base_balances, **additional_balances}

#clique genesis at beginning
genesis_dict = {
    
    "config":{
        'chainId': 11,
        'homesteadBlock': 0,
        'eip150Block': 0,
        'eip155Block': 0,
        'eip158Block': 0,
        'byzantiumBlock': 0,
        'clique':{
                    'period':5,
                    'epoch':30000 
        }
    },
    "alloc": merged_balances,
    "coinbase": "0x0000000000000000000000000000000000000000",
    "difficulty": "0x1",
    "extraData": f"0x0000000000000000000000000000000000000000000000000000000000000000{''.join(all_accounts)}0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
    "gasLimit": "0x2fefd8",
    "mixHash": "0x0000000000000000000000000000000000000000000000000000000000000000",
    "nonce": "0x0000000000000042",
    "timestamp": "0x00"
    
    
}

pprnt.pprint(genesis_dict)

with open(f"{exp_dir}/genesis.json", 'w') as outfile:  
    json.dump(genesis_dict, outfile)
    
#push genesis from local to remote VMs
for index, i in enumerate(ec2_instances):
    #get account from all instances
    cmd = f"scp {scp_flags}  {exp_dir}/genesis.json ubuntu@{i.private_ip_address}:~/genesis.json"
    print(cmd)
    !{cmd}


{'alloc': {'0000000000000000000000000000000000000001': {'balance': '1'},
           '0000000000000000000000000000000000000002': {'balance': '1'},
           '0000000000000000000000000000000000000003': {'balance': '1'},
           '0000000000000000000000000000000000000004': {'balance': '1'},
           '0000000000000000000000000000000000000005': {'balance': '1'},
           '0000000000000000000000000000000000000006': {'balance': '1'},
           '0000000000000000000000000000000000000007': {'balance': '1'},
           '0000000000000000000000000000000000000008': {'balance': '1'},
           '4bb8088de899d80d7f1dd6d4c96fed18d6d52070': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'},
           '63ee30bd4a47b4877380c7e07f61c26871982d98': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'},
           'c2842961f53c0074a299a65863a4c16572e6f021': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'},
     

## Start geth service on all nodes

In [73]:
for index, i in enumerate(ec2_instances):
    #get account from all instances
    ssh_cmd = f"ssh {scp_flags} -t ubuntu@{i.private_ip_address}"
    cmd = f'{ssh_cmd}  "sudo mv ~/genesis.json /data/gethNetwork/genesis.json"'
    !{cmd}
    cmd = f"{ssh_cmd} sudo geth --datadir '/data/gethNetwork/node/' init /data/gethNetwork/genesis.json"
    !{cmd}
    cmd = f"{ssh_cmd} sudo systemctl daemon-reload"
    !{cmd}
    cmd = f"{ssh_cmd} sudo systemctl enable geth.service"
    !{cmd}
    cmd = f"{ssh_cmd} sudo systemctl start geth.service"
    !{cmd}
    #!sudo geth --datadir "/data/gethNetwork/node/" --networkid 31 --verbosity 3 --port 30310 --rpc --rpcaddr "0.0.0.0" --rpcport 8101 console --rpcapi clique,eth,miner --nat=extip:i.private_ip_address  --unlock all_accounts[index] --password "/data/gethNetwork/password.txt"
        

Connection to 10.3.2.77 closed.
[33mWARN [0m[05-03|11:11:29.860] Sanitizing cache to Go's GC limits       [33mprovided[0m=1024 [33mupdated[0m=327
[32mINFO [0m[05-03|11:11:29.864] Maximum peer count                       [32mETH[0m=25 [32mLES[0m=0 [32mtotal[0m=25
[32mINFO [0m[05-03|11:11:29.866] Allocated cache and file handles         [32mdatabase[0m=/data/gethNetwork/node/geth/chaindata [32mcache[0m=16 [32mhandles[0m=16
[32mINFO [0m[05-03|11:11:29.884] Writing custom genesis block 
[32mINFO [0m[05-03|11:11:29.885] Persisted trie from memory database      [32mnodes[0m=15 [32msize[0m=2.40kB [32mtime[0m=144.435µs [32mgcnodes[0m=0 [32mgcsize[0m=0.00B [32mgctime[0m=0s [32mlivenodes[0m=1 [32mlivesize[0m=0.00B
[32mINFO [0m[05-03|11:11:29.886] Successfully wrote genesis state         [32mdatabase[0m=chaindata                             [32mhash[0m=a2295b…a9a134
[32mINFO [0m[05-03|11:11:29.886] Allocated cache and file handles         [32mdat

## Add enodes from all nodes to all nodes

In [74]:
enodes = []
#collect enodes
web3_clients = []
for index, i in enumerate(ec2_instances):
    print(f"http://{i.private_ip_address}:8545")
    web3_clients.append(Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545")))
    #print(web3.admin)
    enodes.append((i.private_ip_address,web3_clients[index].admin.nodeInfo.enode))
    
#print(enodes)
print([enode for (ip, enode) in enodes])

with open(f"{exp_dir}/static-nodes.json", 'w') as outfile:  
    json.dump([enode for (ip, enode) in enodes], outfile)
    
#distribute collected enodes over network
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    for ip, enode in enodes:
        #dont add own enode
        if ip != i.private_ip_address:
            web3_clients[index].admin.addPeer(enode)
    
    pprnt.pprint(web3_clients[index].admin.peers)

            


http://10.3.2.77:8545
http://10.3.2.90:8545
http://10.3.2.72:8545
http://10.3.2.74:8545
['enode://bb42c901712663b25cb8fad056d820fd6f654164eca4e60c5005064af833a710bffafde2b488efbba9a7e5616ea3d321ee10f43f637651636e6cc362ae981471@10.3.2.77:30310', 'enode://66c3103b5a6e2eb6e60bf0db2095acbc6d81163e4fb3bc4e426a77243c40395c17cb0839353eab9b27cac85364f0605b0c233125fe84e930e11a61bd3c57ae15@10.3.2.90:30310', 'enode://a1c714fa8a226b296af40c3b2206488233f50f006332e37d3c561f78a110719547974a7eb06f66b929ce2ded0a253555377bcc75f2e69d5be73da938aae2827d@10.3.2.72:30310', 'enode://efa0af345b4ed22f11b06977c56f6bb928c6f78dafb00e70cd3825d24876af081e642d90650f4723420c36de646e5bc71904e74e32731ba46bab12b3c0d3a069@10.3.2.74:30310']
[{'caps': ['eth/63'],
  'enode': 'enode://efa0af345b4ed22f11b06977c56f6bb928c6f78dafb00e70cd3825d24876af081e642d90650f4723420c36de646e5bc71904e74e32731ba46bab12b3c0d3a069@10.3.2.74:30310',
  'id': '644384b0ca62943da81786a189d3ebe25533e9638ad2e78361200a019afb2b97',
  'name': 'Geth/v1.8.2

## Some Ethereum testing


In [75]:
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    print("IsMining:" + str(web3_clients[index].eth.mining))
    for acc in all_accounts:
        print(str(web3_clients[index].toChecksumAddress(acc)) + ": "+ str(web3_clients[index].eth.getBalance(web3_clients[index].toChecksumAddress(acc))))

#https://web3py.readthedocs.io/en/stable/middleware.html#geth-style-proof-of-authority     
from web3.middleware import geth_poa_middleware

try:
    web3_clients[0].middleware_stack.inject(geth_poa_middleware, layer=0)
except:
    print("Middleware already injected")
    
print("Tx from " + str(web3_clients[0].toChecksumAddress(all_accounts[0]))+"to "+ str(web3_clients[0].toChecksumAddress(all_accounts[1])) )
web3_clients[0].personal.sendTransaction({ 'from': web3_clients[0].toChecksumAddress(all_accounts[0]),'to': web3_clients[0].toChecksumAddress(all_accounts[1]),  'value': web3_clients[0].toWei(23456,'ether'), 'gas': '0x5208', 'gasPrice': web3_clients[0].toWei(5, 'gwei')},"password")


IsMining:True
0x63EE30bD4a47b4877380C7e07F61c26871982D98: 904625697166532776746648320380374280103671755200316906558262375061821325312
0xC2842961f53C0074A299a65863a4c16572e6f021: 904625697166532776746648320380374280103671755200316906558262375061821325312
0xcE20Ab481b0Fdc452FA232C2aa7Fc5d66D66daDF: 904625697166532776746648320380374280103671755200316906558262375061821325312
0x4Bb8088de899d80d7F1DD6D4C96feD18D6d52070: 904625697166532776746648320380374280103671755200316906558262375061821325312
IsMining:True
0x63EE30bD4a47b4877380C7e07F61c26871982D98: 904625697166532776746648320380374280103671755200316906558262375061821325312
0xC2842961f53C0074A299a65863a4c16572e6f021: 904625697166532776746648320380374280103671755200316906558262375061821325312
0xcE20Ab481b0Fdc452FA232C2aa7Fc5d66D66daDF: 904625697166532776746648320380374280103671755200316906558262375061821325312
0x4Bb8088de899d80d7F1DD6D4C96feD18D6d52070: 904625697166532776746648320380374280103671755200316906558262375061821325312
IsMining:Tru

HexBytes('0x0f1fb3ba71bc3e5c8e6671a8145e5e64f8c80e363b9627400088ae44c9c49a60')

In [76]:
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    for acc in all_accounts:
        print(str(web3_clients[index].toChecksumAddress(acc)) + ": "+ str(web3_clients[index].eth.getBalance(web3_clients[index].toChecksumAddress(acc))))
    print("---------------------------")


0x63EE30bD4a47b4877380C7e07F61c26871982D98: 904625697166532776746648320380374280103671755200316883102262270061821325312
0xC2842961f53C0074A299a65863a4c16572e6f021: 904625697166532776746648320380374280103671755200316930014262480061821325312
0xcE20Ab481b0Fdc452FA232C2aa7Fc5d66D66daDF: 904625697166532776746648320380374280103671755200316906558262375061821325312
0x4Bb8088de899d80d7F1DD6D4C96feD18D6d52070: 904625697166532776746648320380374280103671755200316906558262375061821325312
---------------------------
0x63EE30bD4a47b4877380C7e07F61c26871982D98: 904625697166532776746648320380374280103671755200316883102262270061821325312
0xC2842961f53C0074A299a65863a4c16572e6f021: 904625697166532776746648320380374280103671755200316930014262480061821325312
0xcE20Ab481b0Fdc452FA232C2aa7Fc5d66D66daDF: 904625697166532776746648320380374280103671755200316906558262375061821325312
0x4Bb8088de899d80d7F1DD6D4C96feD18D6d52070: 904625697166532776746648320380374280103671755200316906558262375061821325312
------------

In [80]:
#web3_clients[0].middleware_stack.inject(geth_poa_middleware, layer=0)
web3_clients[0].eth.getBlock('latest')

AttributeDict({'difficulty': 2,
 'proofOfAuthorityData': HexBytes('0xd88301081b846765746888676f312e31302e34856c696e757800000000000000a7d062a84b098e0d41d65c1d565ae7d7cb1066a47b887f2c29827ae850e753a42271a98f6785ca8200ad984e9353d7d8c7514a443a56185c7177d4718013ec9f00'),
 'gasLimit': 3206620,
 'gasUsed': 0,
 'hash': HexBytes('0x8098571a10ea74c499a4834d0eb3c43f69e51c891b188616856668893ad908b7'),
 'logsBloom': HexBytes('0x00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'),
 'miner': '0x0000000000000000000000000000000000000000',
 'mixHash

## Pull log files from all nodes and store them to experiment directory

In [81]:
scp_flags = "-i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
for index, i in enumerate(ec2_instances):
    #get account from all instances
    cmd = f"scp {scp_flags} ubuntu@{i.private_ip_address}:/var/log/geth.log {exp_dir}/geth_logs/geth_log_node_{index}.log"
    !{cmd}
    cmd = f"scp {scp_flags} ubuntu@{i.private_ip_address}:/var/log/user_data.log {exp_dir}/user_data_logs/user_data_log_node_{index}.log"
    !{cmd}

geth.log                                      100%   14KB  13.8KB/s   00:00    
user_data.log                                 100%   20KB  20.2KB/s   00:00    
geth.log                                      100%   14KB  13.6KB/s   00:00    
user_data.log                                 100%   20KB  19.7KB/s   00:00    
geth.log                                      100%   15KB  14.6KB/s   00:00    
user_data.log                                 100%   20KB  19.7KB/s   00:00    
geth.log                                      100%   14KB  13.8KB/s   00:00    
user_data.log                                 100%   14KB  14.4KB/s   00:00    


## Stop all instances

In [82]:
for i in ec2_instances:
    i.stop()

## Calculate Costs of the VM instances
* Get launch and stop time of each VM &rarr; get uptime for all Vms
* Get price per VM 
* Get storage price per VM
<br />
&rarr; Use this information to calculate the total costs

## ToDo: Discuss Timezones!!!

## Calculate uptime for all launched VMs


In [83]:
def calculate_transition_time(instance, new_state ="stopped"):
    """Calculate the  stop time of a given VM instance"""

    
    #get stop time for all stopped instances
    #https://stackoverflow.com/questions/41231630/checking-stop-time-of-ec2-instance-with-boto3
    client = session.client('ec2', region_name='eu-central-1')
    rsp = client.describe_instances(InstanceIds=[instance.id])
    if rsp:
        status = rsp['Reservations'][0]['Instances'][0]
        if status['State']['Name'] == new_state:
            stopped_reason = status['StateTransitionReason']
            transition_time = re.findall('.*\((.*)\)', stopped_reason)[0]
            #print (f"Stop Time of {instance.id}:{stop_time}")
        
            return transition_time
        

stop_times = []
print("Waiting for all instances to reach stopped status")
for i in ec2_instances:

    i.wait_until_stopped()
    stop_time = calculate_transition_time(i)  
    stop_times.append(datetime.datetime.strptime(stop_time, '%Y-%m-%d %H:%M:%S %Z'))

print("All instances have now reached stopped status")
print("Launch Times:" + str(launch_times))
print("Stop Times:" + str(stop_times))


time_differences = np.subtract(stop_times, launch_times)

def diff_in_hours(x):
    return float(x.total_seconds() / 3600)

time_diff_in_hours = list(map(diff_in_hours, time_differences))

print(time_diff_in_hours)

Waiting for all instances to reach stopped status
All instances have now reached stopped status
Launch Times:[datetime.datetime(2019, 5, 3, 10, 58, 25), datetime.datetime(2019, 5, 3, 10, 58, 25), datetime.datetime(2019, 5, 3, 10, 58, 25), datetime.datetime(2019, 5, 3, 10, 58, 25)]
Stop Times:[datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51)]
[0.2738888888888889, 0.2738888888888889, 0.2738888888888889, 0.2738888888888889]


## Use aws pricing API to pull ec2 instance and ebs storage costs

### TODO: How to handle months with more or less than 30 days?
### Why is t2.micro for free?

In [84]:
#https://stackoverflow.com/questions/51673667/use-boto3-to-get-current-price-for-given-ec2-instance-type
#TODO CHECK IF PER HOUR OR PER DAY


# Get current AWS price for an on-demand instance
def get_instance_price(region, instance, osys):
    data = pricing_client.get_products(ServiceCode='AmazonEC2',
                                       Filters=[{"Field": "tenancy", "Value": "shared", "Type": "TERM_MATCH"},
                                                {"Field": "operatingSystem", "Value": osys, "Type": "TERM_MATCH"},
                                                {"Field": "preInstalledSw", "Value": "NA", "Type": "TERM_MATCH"},
                                                {"Field": "instanceType", "Value": instance, "Type": "TERM_MATCH"},
                                                {"Field": "location", "Value": region, "Type": "TERM_MATCH"}])
                                       
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    print(od)
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

def get_storage_price(region, volume_type):
    ebs_name_map = {
    'standard': 'Magnetic',
    'gp2': 'General Purpose',
    'io1': 'Provisioned IOPS',
    'st1': 'Throughput Optimized HDD',
    'sc1': 'Cold HDD'
    }
    data = pricing_client.get_products(ServiceCode='AmazonEC2', 
                                       Filters=[
                                                {'Type': 'TERM_MATCH', 'Field': 'volumeType', 'Value': ebs_name_map[volume_type]}, 
                                                {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': region}])
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

# Translate region code to region name
def get_region_name(region_code):
    default_region = 'EU (Frankfurt)'
    endpoint_file = resource_filename('botocore', 'data/endpoints.json')
    try:
        with open(endpoint_file, 'r') as f:
            data = json.load(f)
        return data['partitions'][0]['regions'][region_code]['description']
    except IOError:
        return default_region
    
def extract_ebs_storage_from_blockdevicemapping(b_d_mapping):
    """Extracts all ebs storage from a blockdevicemapping and stores them in storage_dict"""
    for device in b_d_mapping:
        if "Ebs" in device:
            storage_dict[device["Ebs"]["VolumeType"]] += device["Ebs"]["VolumeSize"]
     
    
#dict for all storage 
storage_dict = {
    'standard': 0,
    'gp2': 0,
    'io1': 0,
    'st1': 0,
    'sc1': 0  
}    

extract_ebs_storage_from_blockdevicemapping(config['storage_settings'])
extract_ebs_storage_from_blockdevicemapping(root_storage_mapping)
print(storage_dict)
# Use AWS Pricing API at eu-central-1
#'eu-central-1' not working -> Pricing the same ? 
pricing_client = session.client('pricing', region_name='us-east-1')

# Get current price for a given instance, region and os
# make operation system not hardcoded
instance_price_per_hour = float(get_instance_price(get_region_name("eu-central-1"), config['instance_type'], 'Linux'))

#For example, let's say that you provision a 2000 GB volume for 12 hours (43,200 seconds) in a 30 day month. In a region that charges $0.10 per GB-month, you would be charged $3.33 for the volume ($0.10 per GB-month * 2000 GB * 43,200 seconds / (86,400 seconds/day * 30 day-month)).
#source: https://aws.amazon.com/ebs/pricing/?nc1=h_ls

#get price of used storage
storage_price_per_hour = sum([float(get_storage_price(get_region_name("eu-central-1"), volume_type)) * float(volume_size)/ 30 / 24 for volume_type, volume_size in storage_dict.items()])

print("Instance cost per hour: " + str(instance_price_per_hour))
print("Storage cost per hour: "  + str(storage_price_per_hour))

{'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0}
{'7BF4E6DS5KSK424K.JRTCKXETXF': {'priceDimensions': {'7BF4E6DS5KSK424K.JRTCKXETXF.6YS6EN2CT7': {'unit': 'Hrs', 'endRange': 'Inf', 'description': '$0.00 per Reservation Linux t2.micro Instance Hour', 'appliesTo': [], 'rateCode': '7BF4E6DS5KSK424K.JRTCKXETXF.6YS6EN2CT7', 'beginRange': '0', 'pricePerUnit': {'USD': '0.0000000000'}}}, 'sku': '7BF4E6DS5KSK424K', 'effectiveDate': '2019-04-01T00:00:00Z', 'offerTermCode': 'JRTCKXETXF', 'termAttributes': {}}}
Instance cost per hour: 0.0
Storage cost per hour: 0.00661111111111111


## Calculate total costs by using calculated uptimes and (instance/storage) prices


In [85]:
#calculate price for each instance and then sum up the prices of all instances up to once total price
total_instance_cost_until_stop = sum(map(lambda x: x * instance_price_per_hour, time_diff_in_hours)) 
total_storage_cost_until_stop =  sum(map(lambda x: x * storage_price_per_hour, time_diff_in_hours))       

print(f"The total instance cost of {config['vm_count']} {config['instance_type']} instances running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_instance_cost_until_stop} USD.")
print(f"The total storage  cost of {config['vm_count']} {storage_dict} storage units running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_storage_cost_until_stop} USD.")
total_cost_until_stop = total_instance_cost_until_stop + total_storage_cost_until_stop
print(f"Total Cost: {total_cost_until_stop} USD")

The total instance cost of 4 t2.micro instances running for averagely 0.2739 hours was: 0.0 USD.
The total storage  cost of 4 {'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0} storage units running for averagely 0.2739 hours was: 0.007242839506172839 USD.
Total Cost: 0.007242839506172839 USD


## Terminate the stopped instances for good

### CAREFUL: Storage costs money if instance is stopped  and not terminated

In [86]:
termination_times = []
for i in ec2_instances:
    i.terminate()
    #Note this termination is only an approximation
    termination_times.append(datetime.datetime.utcnow())
    

In [87]:
#DOES NOT WORK: AWS API does not return correct termination time
#Get termination time ## have to wait until termination is reached
#Wait 30 second for instances to reach terminated status
#time.sleep(30)
#termination_times = []
#for i in ec2_instances:
#
##    termination_time = calculate_transition_time(i, new_state="terminated")  
#    termination_times.append(datetime.datetime.strptime(termination_time, '%Y-%m-%d %H:%M:%S %Z'))
#
#            
time_differences_termination = np.subtract(termination_times, stop_times)
time_diff_in_hours_termination = list(map(diff_in_hours, time_differences_termination))

print(stop_times)
print(termination_times)
print(time_diff_in_hours_termination)

#storage costs from point of stopping until point of termination
total_storage_cost_termination =  sum(map(lambda x: x * storage_price_per_hour, time_diff_in_hours_termination))  
print(total_storage_cost_termination)

print(f"The total storage  cost of {config['vm_count']} {storage_dict} storage units idling on stopped status for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_storage_cost_termination} USD.")

[datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51), datetime.datetime(2019, 5, 3, 11, 14, 51)]
[datetime.datetime(2019, 5, 3, 11, 15, 54, 101498), datetime.datetime(2019, 5, 3, 11, 15, 54, 236300), datetime.datetime(2019, 5, 3, 11, 15, 54, 356368), datetime.datetime(2019, 5, 3, 11, 15, 54, 483791)]
[0.017528193888888887, 0.017565638888888888, 0.01759899111111111, 0.017634386388888888]
0.00046494100128086416
The total storage  cost of 4 {'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0} storage units idling on stopped status for averagely 0.2739 hours was: 0.00046494100128086416 USD.


In [88]:
aws_costs = {
    'instance_type':config['instance_type'],
    'vm_count':config['vm_count'],
    'storage_in_GB':storage_dict,
    'launch_times':launch_times,
    'stop_times': stop_times,
    'termination_times': termination_times,
    'instance_price_per_hour': instance_price_per_hour,
    'storage_price_per_hour': storage_price_per_hour,
    'total_cost_until_stop':total_cost_until_stop,
    'total_cost_until_termination': total_cost+total_storage_cost_termination,
    'currency': 'USD'
    
}

def datetimeconverter(o):
    """Converter to make datetime objects json dumpable"""
    if isinstance(o, datetime.datetime):
        return o.__str__()
with open(f"{exp_dir}/aws_costs.json", 'w') as outfile:  
    json.dump(aws_costs, outfile,default = myconverter)