# AWS Blockchain Automisation Script

## Multiple Steps needed:
1. Launch the VM according to settings
2. Configure and Install everything on launched VMs (network settings, packages, ...) 
3. Run Experiments on VM
4. After finishing the experiments, send metrics to database
5. Terminate VMs and calculate aws costs of VMs and storage

## TODO: Introduce Logging

### Ensure that aws config and credentials are configured on the machine where the script is executed

In [1]:
import sys, os, pprint
import json
import botocore, boto3
import getpass
import re
import datetime, pytz, time
utc = pytz.utc
from dateutil import tz
import json
import numpy as np
from pkg_resources import resource_filename
from dateutil import parser

from web3 import Web3

## logging
import logging
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("py4j").setLevel(logging.ERROR)


#os.environ["HTTPS_PROXY"]="https://proxy.ccc.eu-central-1.aws.cloud.bmw:8080"
#os.environ["HTTP_PROXY"]="http://proxy.ccc.eu-central-1.aws.cloud.bmw:8080"


#Can we do this?
print("Enter proxy password:")
password = getpass.getpass()

#Set proxy
#technical user
#do you need proxy? @emil
os.environ["HTTPS_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"
os.environ["HTTP_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"
os.environ["NO_PROXY"] = "localhost,127.0.0.1,.muc,.aws.cloud.bmw,.azure.cloud.bmw,.bmw.corp,.bmwgroup.net"


#print(os.environ)

Enter proxy password:


 ········


## Experiment Settings (#VMs, storage, network_settings, aws profile, ...)

### Keep in Mind: If UserData script mounts a drive, the drive needs to be provided in the first place using the blockdevicemappings, else nothing can be mounted

In [125]:
#VM variables (Changes to argpass CLI later?)
VM_count = 3
instance_type = "t2.micro" #use t2.nano for test purposes


#image id for the VM 
#if image_id = None, pull newest linux image according to settings
#image_id =  "ami-de8fb135"
image_id =  None

#settings for image (right now only ubuntu is supported)
image = {"os": "ubuntu",
         "version": 18,
         "permissions": "default"
        }


subnet_id = "subnet-0ac7aeeec87150dd7"
security_group_id = ["sg-0db312b6f84d66889"]
user = "ubuntu"
profile = "block_exp"
key_name = "blockchain"
tag_name = "blockchain_philipp"

#The UserData parameter is a string, the contents of which becomes the User Data.
#While the AWS Command-Line Interface (CLI) allows you to specify a file as input, boto3 does not.
#source: https://stackoverflow.com/a/45863733
user_data_script =  "EC2_instance_bootstrap_geth.sh"

#read contents of shell script
with open(user_data_script, 'r') as content_file:
    user_data = content_file.read()
    
#print(repr(user_data))

#settings for the additional storage drive (Change volume size to your needs)
storage_settings = [
        {
            'DeviceName': "/dev/sdb",
            'VirtualName': 'string',
            'Ebs': {
                'DeleteOnTermination': True,
                'VolumeSize': 32,
                'VolumeType': 'gp2',
                'Encrypted': True,
                'KmsKeyId': 'arn:aws:kms:eu-central-1:731899578576:key/a808826d-e460-4271-a23b-29e1e0807c1d'
            },
        },
    ]


exp_type = "ETH"

## Search for the newest stable ubuntu image ID

* Owner? -> 099720109477
* https://askubuntu.com/a/53586

You can select an AMI to use based on the following characteristics: (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ComponentsAMIs.html)

 * Region (see Regions and Availability Zones) -> flexibel (default frankfurt)

 * Operating system -> ubuntu (version?) (make it flexibel)

 * Architecture (32-bit or 64-bit) -> 64bit (x86_64)

 * Launch Permissions

 * Storage for the Root Device

In [126]:
#print(os.environ)
#os.environ["HTTPS_PROXY"]="http://qqdpoc0:e-------@proxy.muc:8080"
 
pprnt = pprint.PrettyPrinter(indent=1)

def newest_image(list_of_images):
    latest = None
    for image in list_of_images:
        if not latest:
            latest = image
            continue

        if parser.parse(image['CreationDate']) > parser.parse(latest['CreationDate']):
            latest = image

    return latest
 
if image_id == None:
    session = boto3.Session(profile_name=profile)
    ec2 = session.client('ec2', region_name='eu-central-1')
    #pprnt.pprint(ec2.describe_instances())
 
    # Find the latest official Ubuntu image from Canonical(owner = 099720109477)
    #aws ec2 describe-images --owners 099720109477 --filters 'Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-*-18*-amd64-server-????????' 'Name=state,Values=available' --output json | jq -r '.Images | sort_by(.CreationDate) | last(.[])'

    amis = ec2.describe_images(
         Filters=[
             {
                 'Name': 'name',
                 'Values': [f"{image['os']}/images/hvm-ssd/{image['os']}-*-{image['version']}*-amd64-server-????????"]
             },
             {
             'Name': 'architecture',
             'Values': ['x86_64']
             },
             {
                 'Name': 'state',
                 'Values': ['available']
             },
             {
                 'Name': 'root-device-type',
                 'Values': ['ebs']
             }
         ],
         Owners=[
             '099720109477',
         ]
     )
    #pprnt.pprint(amis["Images"])
    #pprnt.pprint(amis)
    image = newest_image(amis['Images'])
    image_id = image["ImageId"]

    #root_storage_mapping = image["BlockDeviceMappings"]
    #print([x for x in source_image["BlockDeviceMappings"]])
    #print(image_id)
    #pprnt.pprint(image)

ec2 = session.resource('ec2')
image = ec2.Image(image_id)
root_storage_mapping = image.block_device_mappings

print("Selected Image: " + image.description)
    

Selected Image: Canonical, Ubuntu, 18.04 LTS, amd64 bionic image build on 2019-04-03


## Start x VMs according to settings, configure the launched VMs according to given shell script

In [127]:
#Jupyter online, but my profile/credential stuff is offline
##added config and credentials via terminal /home/q481264/.aws
#To ensure faster instance launches, break up large requests into smaller batches. 
#For example, create five separate launch requests for 100 instances each instead of one launch request for 500 instances.
#TODO add creating script
session = boto3.Session(profile_name=profile)
ec2 = session.resource('ec2', region_name='eu-central-1')
ec2_instances = ec2.create_instances(
    ImageId = image_id,
    MinCount = 1,
    MaxCount = VM_count,
    InstanceType = instance_type,
    KeyName = key_name,
    SubnetId = subnet_id,
    BlockDeviceMappings = storage_settings,
    UserData = user_data,
    TagSpecifications=[
        {
            'ResourceType': "instance",
            'Tags': [
                {
                    'Key': 'Creator',
                    'Value': tag_name
                },
                {
                    'Key': 'Name',
                    'Value': tag_name
                },
                    ]
        },
                      ],
SecurityGroupIds=security_group_id
)

#Add experiment name tag
#Does experiment already exist? experiment-date-?hash?
# -> Build pipeline which supports different experiment settings/framweworks/...
##How to identify the instances of different experiments?
#Tag idea: Exp-Ethereum-010419-1223-Node1
#UserData = user_data,
#BlockDeviceMappings = storage_settings

In [130]:
ips=[]
for i in ec2_instances:
    i.wait_until_running()
    i.load()
    print(f"ID: {i.id}, State: {i.state['Name']}, IP: {i.private_ip_address}")
    ips.append(i.private_ip_address)

#add no procy for all VM ips
os.environ["NO_PROXY"] = f"localhost,127.0.0.1,.muc,.aws.cloud.bmw,.azure.cloud.bmw,.bmw.corp,.bmwgroup.net,{','.join(str(ip) for ip in ips)}"
    
print(f"You can now access machines via: ssh -i \"path to {key_name} key\" ubuntu@{ips} (if user is ubuntu) ")
print(f"e.g. ssh -i ~/.ssh/blockchain ubuntu@{ips[0]}")

ID: i-0d6163b909f6b68fe, State: running, IP: 10.3.2.89
ID: i-09f9d8ea269657e48, State: running, IP: 10.3.2.88
ID: i-004612f8782376d32, State: running, IP: 10.3.2.76
You can now access machines via: ssh -i "path to blockchain key" ubuntu@['10.3.2.89', '10.3.2.88', '10.3.2.76'] (if user is ubuntu) 
e.g. ssh -i ~/.ssh/blockchain ubuntu@10.3.2.89


In [128]:
#Give launched instances tag with time/type of experiment/number of node
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')
for index, i in enumerate(ec2_instances):
    exp_tag = f"exp_{st}_{exp_type}_Node{index}"
    ec2.create_tags(Resources=[
        i.id,
    ],
    Tags=[
        {
            'Key': 'exp_tag',
            'Value': exp_tag
        },
    ])

## Record launch times of all VMs (later needed for calculating aws costs)

In [129]:
launch_times = []
for i in ec2_instances:
    print("Launch Time: " + str(i.launch_time))
    #get launch time
    launch_times.append(i.launch_time.replace(tzinfo=None))
    

Launch Time: 2019-04-30 14:07:22+00:00
Launch Time: 2019-04-30 14:07:22+00:00
Launch Time: 2019-04-30 14:07:22+00:00


# Get Ethereum Accounts from all Nodes
## ssh key is needed for scp to work (need to be present where the jupyter notebook is executed)

In [131]:
#Make experiment folder where all info is stored
#mkdir -p parentfolder/{subfolder1,subfolder2,subfolder3}
exp_dir = f"exp_{st}_{exp_type}"
cmd = f"mkdir {exp_dir}"
!{cmd}
cmd = f"mkdir -p {exp_dir}/accounts"
!{cmd}
cmd = f"mkdir {exp_dir}/enodes"
!{cmd}

In [132]:
# how to wait until all VMs are finished with setting up UserData?
#-o UserKnownHostsFile=/dev/null
scp_flags = "-i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
for index, i in enumerate(ec2_instances):
    #get account from all instances
    cmd = f"scp {scp_flags} ubuntu@{i.private_ip_address}:/data/gethNetwork/account.txt {exp_dir}/accounts/account_node_{index}.txt "
    print(cmd)
    !{cmd}
    


scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.89:/data/gethNetwork/account.txt exp_2019-04-30_16-11-28_ETH/accounts/account_node_0.txt 
account.txt                                   100%   41     0.0KB/s   00:00    
scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.88:/data/gethNetwork/account.txt exp_2019-04-30_16-11-28_ETH/accounts/account_node_1.txt 
account.txt                                   100%   41     0.0KB/s   00:00    
scp -i ~/.ssh/blockchain  -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@10.3.2.76:/data/gethNetwork/account.txt exp_2019-04-30_16-11-28_ETH/accounts/account_node_2.txt 
account.txt                                   100%   41     0.0KB/s   00:00    


In [133]:
all_accounts = []
path = f"{exp_dir}/accounts"
fileList = os.listdir(path)
for file in fileList:
    file = open(os.path.join(path + "/"+ file), 'r')
    all_accounts.append(file.read())
    file.close()
    
all_accounts = [x.rstrip() for x in all_accounts]
print(all_accounts)

['78b52838bf7391beb4b6cd887e79c8c013145606', '495ba5b85dc5c95634b28e5c690a5c105ad1116a', 'c427bdfaa22e29a8e05c0ac7df903606b9383128']


## Build Genesis file, distribute it to VMs and start geth nodes with it

In [134]:
balances = ["0x200000000000000000000000000000000000000000000000000000000000000" for x in all_accounts]
base_balances = {   "0000000000000000000000000000000000000001": { "balance": "1" },
                    "0000000000000000000000000000000000000002": { "balance": "1" },
                    "0000000000000000000000000000000000000003": { "balance": "1" },
                    "0000000000000000000000000000000000000004": { "balance": "1" },
                    "0000000000000000000000000000000000000005": { "balance": "1" },
                    "0000000000000000000000000000000000000006": { "balance": "1" },
                    "0000000000000000000000000000000000000007": { "balance": "1" },
                    "0000000000000000000000000000000000000008": { "balance": "1" }}
additional_balances = {str(x): {"balance": str(y)} for x, y in zip(all_accounts,balances)}
merged_balances = {**base_balances, **additional_balances}

#clique genesis at beginning
genesis_dict = {
    
    "config":{
        'chainId': 11,
        'homesteadBlock': 0,
        'eip150Block': 0,
        'eip155Block': 0,
        'eip158Block': 0,
        'byzantiumBlock': 0,
        'clique':{
                    'period':15,
                    'epoch':30000 
        }
    },
    "alloc": merged_balances,
    "coinbase": "0x0000000000000000000000000000000000000000",
    "difficulty": "0x1",
    "extraData": f"0x0000000000000000000000000000000000000000000000000000000000000000{''.join(all_accounts)}0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
    "gasLimit": "0x2fefd8",
    "mixHash": "0x0000000000000000000000000000000000000000000000000000000000000000",
    "nonce": "0x0000000000000042",
    "timestamp": "0x00"
    
    
}

pprnt.pprint(genesis_dict)

with open(f"{exp_dir}/genesis.json", 'w') as outfile:  
    json.dump(genesis_dict, outfile)
    
#push genesis from local to remote VMs
for index, i in enumerate(ec2_instances):
    #get account from all instances
    cmd = f"scp {scp_flags}  {exp_dir}/genesis.json ubuntu@{i.private_ip_address}:~/genesis.json"
    print(cmd)
    !{cmd}


{'alloc': {'0000000000000000000000000000000000000001': {'balance': '1'},
           '0000000000000000000000000000000000000002': {'balance': '1'},
           '0000000000000000000000000000000000000003': {'balance': '1'},
           '0000000000000000000000000000000000000004': {'balance': '1'},
           '0000000000000000000000000000000000000005': {'balance': '1'},
           '0000000000000000000000000000000000000006': {'balance': '1'},
           '0000000000000000000000000000000000000007': {'balance': '1'},
           '0000000000000000000000000000000000000008': {'balance': '1'},
           '495ba5b85dc5c95634b28e5c690a5c105ad1116a': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'},
           '78b52838bf7391beb4b6cd887e79c8c013145606': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'},
           'c427bdfaa22e29a8e05c0ac7df903606b9383128': {'balance': '0x200000000000000000000000000000000000000000000000000000000000000'}},
 'co

In [135]:
for index, i in enumerate(ec2_instances):
    #get account from all instances
    ssh_cmd = f"ssh {scp_flags} -t ubuntu@{i.private_ip_address}"
    #print(cmd)
    #!{cmd}
    cmd = f'{ssh_cmd}  "sudo mv ~/genesis.json /data/gethNetwork/genesis.json"'
    #print(cmd)
    !{cmd}
    #print(cmd)
    cmd = f"{ssh_cmd} sudo geth --datadir '/data/gethNetwork/node/' init /data/gethNetwork/genesis.json"
    !{cmd}
    #cmd = f"{ssh_cmd} sudo geth --datadir '/data/gethNetwork/node/' --networkid 31 --verbosity 3 --port 30310 --rpc --rpcaddr '0.0.0.0'  --rpcapi db,clique,miner,eth,net,web3,personal,web3,admin --nat=extip:{i.private_ip_address}  --unlock {all_accounts[index]} --password '/data/gethNetwork/password.txt' 2>&1 | tee {exp_dir}/enodes/enode_raw_node_{index}.txt"
    #print(cmd)
    cmd = f"{ssh_cmd} sudo systemctl daemon-reload"
    !{cmd}
    cmd = f"{ssh_cmd} sudo systemctl enable geth.service"
    !{cmd}
    cmd = f"{ssh_cmd} sudo systemctl start geth.service"
    !{cmd}
    #!{cmd}
    #systemctl --user start geth.service
    #!sudo geth --datadir "/data/gethNetwork/node/" --networkid 31 --verbosity 3 --port 30310 --rpc --rpcaddr "0.0.0.0" --rpcport 8101 console --rpcapi clique,eth,miner --nat=extip:i.private_ip_address  --unlock all_accounts[index] --password "/data/gethNetwork/password.txt"
        

Connection to 10.3.2.89 closed.
[33mWARN [0m[04-30|14:16:10.099] Sanitizing cache to Go's GC limits       [33mprovided[0m=1024 [33mupdated[0m=327
[32mINFO [0m[04-30|14:16:10.102] Maximum peer count                       [32mETH[0m=25 [32mLES[0m=0 [32mtotal[0m=25
[32mINFO [0m[04-30|14:16:10.105] Allocated cache and file handles         [32mdatabase[0m=/data/gethNetwork/node/geth/chaindata [32mcache[0m=16 [32mhandles[0m=16
[32mINFO [0m[04-30|14:16:10.122] Writing custom genesis block 
[32mINFO [0m[04-30|14:16:10.124] Persisted trie from memory database      [32mnodes[0m=13 [32msize[0m=2.11kB [32mtime[0m=133.851µs [32mgcnodes[0m=0 [32mgcsize[0m=0.00B [32mgctime[0m=0s [32mlivenodes[0m=1 [32mlivesize[0m=0.00B
[32mINFO [0m[04-30|14:16:10.124] Successfully wrote genesis state         [32mdatabase[0m=chaindata                             [32mhash[0m=0e3104…27e1be
[32mINFO [0m[04-30|14:16:10.125] Allocated cache and file handles         [32mdat

In [136]:
enodes = []
#collect enodes
web3_clients = []
for index, i in enumerate(ec2_instances):
    print(f"http://{i.private_ip_address}:8545")
    web3_clients.append(Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545")))
    #print(web3.admin)
    enodes.append((i.private_ip_address,web3_clients[index].admin.nodeInfo.enode))
    
#print(enodes)
print([enode for (ip, enode) in enodes])

with open(f"{exp_dir}/static-nodes.json", 'w') as outfile:  
    json.dump([enode for (ip, enode) in enodes], outfile)
    
#distribute collected enodes over network
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    for ip, enode in enodes:
        #dont add own enode
        if ip != i.private_ip_address:
            web3_clients[index].admin.addPeer(enode)
    
    pprnt.pprint(web3_clients[index].admin.peers)

            


http://10.3.2.89:8545
http://10.3.2.88:8545
http://10.3.2.76:8545
['enode://75d410ccd9435d0471f2ca89bd440b3b8ff5bdec13c29df27f23425d022a31f5682658ddcde0789049f7f2f9f1a474631d04e3cf95d613f11e34e1e54faa67b4@10.3.2.89:30310', 'enode://d407edb29c1373571714558fa9fe92c5a7c73aabf0cab5b8ab43242f77786d1cb4df1c698861b0feb3354bb92d6d6d76bd21c2a78bf0470af2be5ed69498fde1@10.3.2.88:30310', 'enode://7d3ca19556f96c9e7d08156219240964cf653e1cb3539ff55b7291f4f08f6aac3a4aa81d722caaa9dac62e681bebff2b985ff5b96f89156a891e4a35d3816db4@10.3.2.76:30310']
[{'caps': ['eth/63'],
  'enode': 'enode://7d3ca19556f96c9e7d08156219240964cf653e1cb3539ff55b7291f4f08f6aac3a4aa81d722caaa9dac62e681bebff2b985ff5b96f89156a891e4a35d3816db4@10.3.2.76:30310',
  'id': 'b1cd30152e5ec3e65cdfeb88934fa703e172f4b31194c1ca174f57cccbd47db1',
  'name': 'Geth/v1.8.27-stable-4bcc0a37/linux-amd64/go1.10.4',
  'network': {'inbound': False,
              'localAddress': '10.3.2.89:33386',
              'remoteAddress': '10.3.2.76:30310',
      

## Some Ethereum testing


In [155]:
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    print("IsMining:" + str(web3_clients[index].eth.mining))
    for acc in all_accounts:
        #print(acc)
        print(str(web3_clients[index].toChecksumAddress(acc)) + ": "+ str(web3_clients[index].eth.getBalance(web3_clients[index].toChecksumAddress(acc))))

#https://web3py.readthedocs.io/en/stable/middleware.html#geth-style-proof-of-authority     
from web3.middleware import geth_poa_middleware


#web3 = Web3(Web3.HTTPProvider(f"http://{ec2_instances[0].private_ip_address}:8545"))
#'value': web3.toWei(1000,'ether'), 'gas':1000,'gasPrice': web3.toWei(5, 'gwei')},"password"
#web3_clients[0].middleware_stack.inject(geth_poa_middleware, layer=0)
print("Tx from " + str(web3_clients[0].toChecksumAddress(all_accounts[0]))+"to "+ str(web3.toChecksumAddress(all_accounts[1])) )
web3_clients[0].personal.sendTransaction({ 'from': web3_clients[0].toChecksumAddress(all_accounts[0]),'to': web3.toChecksumAddress(all_accounts[1]),  'value': web3.toWei(23456,'ether'), 'gas': '0x5208', 'gasPrice': web3.toWei(5, 'gwei')},"password")


IsMining:True
0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 904625697166532776746648320380374280103671755200316931016262480061821325312
0xc427bDFAA22e29a8E05c0Ac7Df903606b9383128: 904625697166532776746648320380374280103671755200316906558262480061821325312
IsMining:True
0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 904625697166532776746648320380374280103671755200316931016262480061821325312
0xc427bDFAA22e29a8E05c0Ac7Df903606b9383128: 904625697166532776746648320380374280103671755200316906558262480061821325312
IsMining:True
0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 90462569716653277674664832038037428010367175520031693101626248006182132531

HexBytes('0xea372ccc983654818e313ddc39c654cbf8d7775bdf9d690622d7d08d3e3af36b')

In [154]:
for index, i in enumerate(ec2_instances):
    #web3 = Web3(Web3.HTTPProvider(f"http://{i.private_ip_address}:8545"))
    for acc in all_accounts:
        print(str(web3_clients[index].toChecksumAddress(acc)) + ": "+ str(web3_clients[index].eth.getBalance(web3_clients[index].toChecksumAddress(acc))))
    print("---------------------------")


0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 904625697166532776746648320380374280103671755200316931016262480061821325312
0xc427bDFAA22e29a8E05c0Ac7Df903606b9383128: 904625697166532776746648320380374280103671755200316906558262480061821325312
---------------------------
0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 904625697166532776746648320380374280103671755200316931016262480061821325312
0xc427bDFAA22e29a8E05c0Ac7Df903606b9383128: 904625697166532776746648320380374280103671755200316906558262480061821325312
---------------------------
0x78b52838bf7391bEb4B6Cd887E79c8C013145606: 904625697166532776746648320380374280103671755200316882100262165061821325312
0x495bA5b85DC5c95634B28e5c690a5c105ad1116a: 904625697166532776746648320380374280103671755200316931016262

## Stop all instances

In [64]:
for i in ec2_instances:
    i.stop()

## Calculate Costs of the VM instances
* Get launch and stop time of each VM &rarr; get uptime for all Vms
* Get price per VM 
* Get storage price per VM
<br />
&rarr; Use this information to calculate the total costs

## ToDo: Discuss Timezones!!!

## Calculate uptime for all launched VMs


In [65]:
def calculate_transition_time(instance, new_state ="stopped"):
    """Calculate the  stop time of a given VM instance"""

    
    #get stop time for all stopped instances
    #https://stackoverflow.com/questions/41231630/checking-stop-time-of-ec2-instance-with-boto3
    client = session.client('ec2', region_name='eu-central-1')
    rsp = client.describe_instances(InstanceIds=[instance.id])
    if rsp:
        status = rsp['Reservations'][0]['Instances'][0]
        if status['State']['Name'] == new_state:
            stopped_reason = status['StateTransitionReason']
            transition_time = re.findall('.*\((.*)\)', stopped_reason)[0]
            #print (f"Stop Time of {instance.id}:{stop_time}")
        
            return transition_time
        

stop_times = []
print("Waiting for all instances to reach stopped status")
for i in ec2_instances:

    i.wait_until_stopped()
    stop_time = calculate_transition_time(i)  
    stop_times.append(datetime.datetime.strptime(stop_time, '%Y-%m-%d %H:%M:%S %Z'))

print("All instances have now reached stopped status")
print("Launch Times:" + str(launch_times))
print("Stop Times:" + str(stop_times))


time_differences = np.subtract(stop_times, launch_times)

def diff_in_hours(x):
    return float(x.total_seconds() / 3600)

time_diff_in_hours = list(map(diff_in_hours, time_differences))

print(time_diff_in_hours)

Waiting for all instances to reach stopped status
All instances have now reached stopped status
Launch Times:[datetime.datetime(2019, 4, 26, 14, 54, 46), datetime.datetime(2019, 4, 26, 14, 54, 46)]
Stop Times:[datetime.datetime(2019, 4, 26, 15, 25, 51), datetime.datetime(2019, 4, 26, 15, 25, 51)]
[0.5180555555555556, 0.5180555555555556]


## Use aws pricing API to pull ec2 instance and ebs storage costs

### TODO: How to handle months with more or less than 30 days?

In [66]:
#https://stackoverflow.com/questions/51673667/use-boto3-to-get-current-price-for-given-ec2-instance-type
#TODO CHECK IF PER HOUR OR PER DAY

# Get current AWS price for an on-demand instance
def get_instance_price(region, instance, osys):
    data = pricing_client.get_products(ServiceCode='AmazonEC2',
                                       Filters=[{"Field": "tenancy", "Value": "shared", "Type": "TERM_MATCH"},
                                                {"Field": "operatingSystem", "Value": osys, "Type": "TERM_MATCH"},
                                                {"Field": "preInstalledSw", "Value": "NA", "Type": "TERM_MATCH"},
                                                {"Field": "instanceType", "Value": instance, "Type": "TERM_MATCH"},
                                                {"Field": "location", "Value": region, "Type": "TERM_MATCH"}])
                                       
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

def get_storage_price(region, volume_type):
    ebs_name_map = {
    'standard': 'Magnetic',
    'gp2': 'General Purpose',
    'io1': 'Provisioned IOPS',
    'st1': 'Throughput Optimized HDD',
    'sc1': 'Cold HDD'
    }
    data = pricing_client.get_products(ServiceCode='AmazonEC2', 
                                       Filters=[
                                                {'Type': 'TERM_MATCH', 'Field': 'volumeType', 'Value': ebs_name_map[volume_type]}, 
                                                {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': region}])
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

# Translate region code to region name
def get_region_name(region_code):
    default_region = 'EU (Frankfurt)'
    endpoint_file = resource_filename('botocore', 'data/endpoints.json')
    try:
        with open(endpoint_file, 'r') as f:
            data = json.load(f)
        return data['partitions'][0]['regions'][region_code]['description']
    except IOError:
        return default_region
    
def extract_ebs_storage_from_blockdevicemapping(b_d_mapping):
    """Extracts all ebs storage from a blockdevicemapping and stores them in storage_dict"""
    for device in b_d_mapping:
        if "Ebs" in device:
            storage_dict[device["Ebs"]["VolumeType"]] += device["Ebs"]["VolumeSize"]
     
    
#dict for all storage 
storage_dict = {
    'standard': 0,
    'gp2': 0,
    'io1': 0,
    'st1': 0,
    'sc1': 0  
}    

extract_ebs_storage_from_blockdevicemapping(storage_settings)
extract_ebs_storage_from_blockdevicemapping(root_storage_mapping)
print(storage_dict)
# Use AWS Pricing API at eu-central-1
#'eu-central-1' not working -> Pricing the same ? 
pricing_client = session.client('pricing', region_name='us-east-1')

# Get current price for a given instance, region and os
# make operation system not hardcoded
instance_price_per_hour = float(get_instance_price(get_region_name("eu-central-1"), instance_type, 'Linux'))

#For example, let's say that you provision a 2000 GB volume for 12 hours (43,200 seconds) in a 30 day month. In a region that charges $0.10 per GB-month, you would be charged $3.33 for the volume ($0.10 per GB-month * 2000 GB * 43,200 seconds / (86,400 seconds/day * 30 day-month)).
#source: https://aws.amazon.com/ebs/pricing/?nc1=h_ls

#get price of used storage
storage_price_per_hour = sum([float(get_storage_price(get_region_name("eu-central-1"), volume_type)) * float(volume_size)/ 30 / 24 for volume_type, volume_size in storage_dict.items()])

print("Instance cost per hour: "  + str(instance_price_per_hour))
print("Storage cost per hour: "  + str(storage_price_per_hour))

{'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0}
Instance cost per hour: 0.0
Storage cost per hour: 0.00661111111111111


## Calculate total costs by using calculated uptimes and (instance/storage) prices


In [68]:
#calculate price for each instance and then sum up the prices of all instances up to once total price
total_instance_cost = sum(map(lambda x: x * instance_price_per_hour, time_diff_in_hours)) 
total_storage_cost =  sum(map(lambda x: x * storage_price_per_hour, time_diff_in_hours))       

print(f"The total instance cost of {VM_count} {instance_type} instances running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_instance_cost} USD.")
print(f"The total storage  cost of {VM_count} {storage_dict} storage units running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_storage_cost} USD.")
total_cost = total_instance_cost + total_storage_cost
print(f"Total Cost: {total_cost} USD")

The total instance cost of 2 t2.micro instances running for averagely 0.5181 hours was: 0.0 USD.
The total storage  cost of 2 {'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0} storage units running for averagely 0.5181 hours was: 0.006849845679012346 USD.
Total Cost: 0.006849845679012346 USD


## Terminate the stopped instances for good

### CAREFUL: Storage costs money if instance is stopped  and not terminated

In [124]:
termination_times = []
for i in ec2_instances:
    i.terminate()
    #Note this termination is only an approximation
    termination_times.append(datetime.datetime.utcnow())
    
    
#pull termination time term_time - stop_time = stopped_uptime -> calc storage costs

In [70]:
#DOES NOT WORK: AWS API does not return correct termination time
#Get termination time ## have to wait until termination is reached
#Wait 30 second for instances to reach terminated status
#time.sleep(30)
#termination_times = []
#for i in ec2_instances:
#
##    termination_time = calculate_transition_time(i, new_state="terminated")  
#    termination_times.append(datetime.datetime.strptime(termination_time, '%Y-%m-%d %H:%M:%S %Z'))
#
#            
time_differences_termination = np.subtract(termination_times, stop_times)
time_diff_in_hours_termination = list(map(diff_in_hours, time_differences_termination))

print(stop_times)
print(termination_times)
print(time_diff_in_hours_termination)

total_storage_cost_termination =  sum(map(lambda x: x * storage_price_per_hour, time_diff_in_hours_termination))  
print(total_storage_cost_termination)

print(f"The total storage  cost of {VM_count} {storage_dict} storage units idling on stopped status for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_storage_cost_termination} USD.")

[datetime.datetime(2019, 4, 26, 15, 25, 51), datetime.datetime(2019, 4, 26, 15, 25, 51)]
[datetime.datetime(2019, 4, 26, 15, 26, 42, 466492), datetime.datetime(2019, 4, 26, 15, 26, 42, 622245)]
[0.014296247777777778, 0.0143395125]
0.00018931419294753083
The total storage  cost of 2 {'standard': 0, 'gp2': 40, 'io1': 0, 'st1': 0, 'sc1': 0} storage units idling on stopped status for averagely 0.5181 hours was: 0.00018931419294753083 USD.
