# AWS Blockchain Automisation Script

## Multiple Steps needed:
1. Launch the VM according to settings
2. Configure and Install everything on launched VMs (network settings, packages, ...) 
3. Run Experiments on VM
4. After finishing the experiments, send metrics to database
5. Terminate VMs and calculate aws costs of VMs and storage

## TODO: Introduce Logging

### Ensure that aws config and credentials are configured on the machine where the script is executed

In [1]:
import sys, os, pprint
import json
import botocore, boto3
import getpass
import re
import datetime, pytz, time
utc = pytz.utc
from dateutil import tz
import json
import numpy as np
from pkg_resources import resource_filename
from dateutil import parser

## logging
import logging
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("py4j").setLevel(logging.ERROR)


#os.environ["HTTPS_PROXY"]="https://proxy.ccc.eu-central-1.aws.cloud.bmw:8080"
#os.environ["HTTP_PROXY"]="http://proxy.ccc.eu-central-1.aws.cloud.bmw:8080"


#Can we do this?
print("Enter proxy password:")
password = getpass.getpass()

#Set proxy
#technical user
##do you need proxy? @emil
os.environ["HTTPS_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"
os.environ["HTTP_PROXY"]=f"http://qqdpoc0:{password}@proxy.muc:8080"


#print(os.environ)

Enter proxy password:


 ········


## Experiment Settings (#VMs, storage, network_settings, aws profile, ...)

### Keep in Mind: If UserData script mounts a drive, the drive needs to be provided in the first place using the blockdevicemappings, else nothing can be mounted

In [4]:
#VM variables (Changes to argpass CLI later?)
VM_count = 1
instance_type = "t2.nano" #use t2.nano for test purposes


#image id for the VM 
#if image_id = None, pull newest linux image according to settings
#image_id =  "ami-de8fb135"
image_id =  None

#settings for image (right now only ubuntu is supported)
image = {"os": "ubuntu",
         "version": 18,
         "permissions": "default"
        }


subnet_id = "subnet-0ac7aeeec87150dd7"
security_group_id = ["sg-0db312b6f84d66889"]
user = "ubuntu"
profile = "block_exp"
key_name = "blockchain"
tag_name = "blockchain_philipp"

#The UserData parameter is a string, the contents of which becomes the User Data.
#While the AWS Command-Line Interface (CLI) allows you to specify a file as input, boto3 does not.
#source: https://stackoverflow.com/a/45863733
user_data_script =  "EC2_instance_bootstrap.sh"

#read contents of shell script
with open(user_data_script, 'r') as content_file:
    user_data = content_file.read()
    
#print(repr(user_data))

#settings for the additional storage drive (Change volume size to your needs)
storage_settings = [
        {
            'DeviceName': "/dev/sdb",
            'VirtualName': 'string',
            'Ebs': {
                'DeleteOnTermination': True,
                'VolumeSize': 32,
                'VolumeType': 'gp2',
                'Encrypted': True,
                'KmsKeyId': 'arn:aws:kms:eu-central-1:731899578576:key/a808826d-e460-4271-a23b-29e1e0807c1d'
            },
        },
    ]

## Search for the newest stable ubuntu image ID

* Owner? -> 099720109477
* https://askubuntu.com/a/53586

You can select an AMI to use based on the following characteristics: (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ComponentsAMIs.html)

 * Region (see Regions and Availability Zones) -> flexibel (default frankfurt)

 * Operating system -> ubuntu (version?) (make it flexibel)

 * Architecture (32-bit or 64-bit) -> 64bit (x86_64)

 * Launch Permissions

 * Storage for the Root Device

In [6]:
#print(os.environ)
#os.environ["HTTPS_PROXY"]="http://qqdpoc0:e-------@proxy.muc:8080"
 
pprnt = pprint.PrettyPrinter(indent=1)

def newest_image(list_of_images):
    latest = None
    for image in list_of_images:
        if not latest:
            latest = image
            continue

        if parser.parse(image['CreationDate']) > parser.parse(latest['CreationDate']):
            latest = image

    return latest
 
if image_id == None:
    session = boto3.Session(profile_name=profile)
    ec2 = session.client('ec2', region_name='eu-central-1')
    #pprnt.pprint(ec2.describe_instances())
 
    # Find the latest official Ubuntu image from Canonical(owner = 099720109477)
    #aws ec2 describe-images --owners 099720109477 --filters 'Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-*-18*-amd64-server-????????' 'Name=state,Values=available' --output json | jq -r '.Images | sort_by(.CreationDate) | last(.[])'

    amis = ec2.describe_images(
         Filters=[
             {
                 'Name': 'name',
                 'Values': [f"{image['os']}/images/hvm-ssd/{image['os']}-*-{image['version']}*-amd64-server-????????"]
             },
             {
             'Name': 'architecture',
             'Values': ['x86_64']
             },
             {
                 'Name': 'state',
                 'Values': ['available']
             },
             {
                 'Name': 'root-device-type',
                 'Values': ['ebs']
             }
         ],
         Owners=[
             '099720109477',
         ]
     )
    #pprnt.pprint(amis["Images"])
    #pprnt.pprint(amis)
    image = newest_image(amis['Images'])
    image_id = image["ImageId"]

    #root_storage_mapping = image["BlockDeviceMappings"]
    #print([x for x in source_image["BlockDeviceMappings"]])
    #print(image_id)
    #pprnt.pprint(image)

ec2 = session.resource('ec2')
image = ec2.Image(image_id)
root_storage_mapping = image.block_device_mappings

print("Selected Image: " + image.description)
    

Selected Image: Canonical, Ubuntu, 18.04 LTS, amd64 bionic image build on 2019-04-03


## Start x VMs according to settings, configure the launched VMs according to given shell script

In [7]:
#Jupyter online, but my profile/credential stuff is offline
##added config and credentials via terminal /home/q481264/.aws
#To ensure faster instance launches, break up large requests into smaller batches. 
#For example, create five separate launch requests for 100 instances each instead of one launch request for 500 instances.
#TODO add creating script
session = boto3.Session(profile_name=profile)
ec2 = session.resource('ec2', region_name='eu-central-1')
ec2_instances = ec2.create_instances(
    ImageId = image_id,
    MinCount = 1,
    MaxCount = VM_count,
    InstanceType = instance_type,
    KeyName = key_name,
    SubnetId = subnet_id,
    BlockDeviceMappings = storage_settings,
    UserData = user_data,
    TagSpecifications=[
        {
            'ResourceType': "instance",
            'Tags': [
                {
                    'Key': 'string',
                    'Value': tag_name
                },
                {
                    'Key': 'Name',
                    'Value': tag_name
                },
                    ]
        },
                      ],
SecurityGroupIds=security_group_id
)


#UserData = user_data,
#BlockDeviceMappings = storage_settings

In [8]:
ips=[]
for i in ec2_instances:
    i.wait_until_running()
    i.load()
    print(f"ID: {i.id}, State: {i.state['Name']}, IP: {i.private_ip_address}")
    ips.append(i.private_ip_address)
    
print(f"You can now access machines via: ssh -i \"path to {key_name} key\" ubuntu@{ips} (if user is ubuntu) ")
print("e.g. ssh -i ~/.ssh/blockchain ubuntu@10.3.2.77")

ID: i-0c659e497445a5711, State: running, IP: 10.3.2.82
You can now access machines via: ssh -i "path to blockchain key" ubuntu@['10.3.2.82'] (if user is ubuntu) 
e.g. ssh -i ~/.ssh/blockchain ubuntu@10.3.2.77


## Record launch times of all VMs (later needed for calculating aws costs)

In [9]:
launch_times = []
for i in ec2_instances:
    print("Launch Time: " + str(i.launch_time))
    #get launch time
    launch_times.append(i.launch_time.replace(tzinfo=None))
    

Launch Time: 2019-04-17 11:52:15+00:00


## Stop all instances

In [10]:
for i in ec2_instances:
    i.stop()

## Calculate Costs of the VM instances
* Get launch and stop time of each VM &rarr; get uptime for all Vms
* Get price per VM 
* Get storage price per VM
<br />
&rarr; Use this information to calculate the total costs

## ToDo: Discuss Timezones!!!

## Calculate uptime for all launched VMs


In [11]:
def calculate_stop_time(instance):
    """Calculate the  stop time of a given VM instance"""

    
    #get stop time for all stopped instances
    #https://stackoverflow.com/questions/41231630/checking-stop-time-of-ec2-instance-with-boto3
    client = session.client('ec2', region_name='eu-central-1')
    rsp = client.describe_instances(InstanceIds=[instance.id])
    if rsp:
      status = rsp['Reservations'][0]['Instances'][0]
      if status['State']['Name'] == 'stopped':
        stopped_reason = status['StateTransitionReason']
        stop_time = re.findall('.*\((.*)\)', stopped_reason)[0]
        #print (f"Stop Time of {instance.id}:{stop_time}")
        
        return stop_time
        

stop_times = []
for i in ec2_instances:
    print("Waiting for all instances to reach stopped status")
    i.wait_until_stopped()
    print("All instances have now reached stopped status")

    stop_time = calculate_stop_time(i)  
    stop_times.append(datetime.datetime.strptime(stop_time, '%Y-%m-%d %H:%M:%S %Z'))
    
print("Launch Times:" + str(launch_times))
print("Stop Times:" + str(stop_times))


time_differences = np.subtract(stop_times, launch_times)

def diff_in_hours(x):
    return float(x.total_seconds() / 3600)

time_diff_in_hours = list(map(diff_in_hours, time_differences))

print(time_diff_in_hours)

Waiting for all instances to reach stopped status
All instances have now reached stopped status
Launch Times:[datetime.datetime(2019, 4, 17, 11, 52, 15)]
Stop Times:[datetime.datetime(2019, 4, 17, 12, 38, 51)]
[0.7766666666666666]


## Use aws pricing API to pull ec2 instance and ebs storage costs

### TODO: How to handle months with more or less than 30 days?

In [9]:
#https://stackoverflow.com/questions/51673667/use-boto3-to-get-current-price-for-given-ec2-instance-type
#TODO CHECK IF PER HOUR OR PER DAY

# Get current AWS price for an on-demand instance
def get_instance_price(region, instance, osys):
    data = pricing_client.get_products(ServiceCode='AmazonEC2',
                                       Filters=[{"Field": "tenancy", "Value": "shared", "Type": "TERM_MATCH"},
                                                {"Field": "operatingSystem", "Value": osys, "Type": "TERM_MATCH"},
                                                {"Field": "preInstalledSw", "Value": "NA", "Type": "TERM_MATCH"},
                                                {"Field": "instanceType", "Value": instance, "Type": "TERM_MATCH"},
                                                {"Field": "location", "Value": region, "Type": "TERM_MATCH"}])
                                       
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

def get_storage_price(region, volume_type):
    ebs_name_map = {
    'standard': 'Magnetic',
    'gp2': 'General Purpose',
    'io1': 'Provisioned IOPS',
    'st1': 'Throughput Optimized HDD',
    'sc1': 'Cold HDD'
    }
    data = pricing_client.get_products(ServiceCode='AmazonEC2', 
                                       Filters=[
                                                {'Type': 'TERM_MATCH', 'Field': 'volumeType', 'Value': ebs_name_map[volume_type]}, 
                                                {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': region}])
    od = json.loads(data['PriceList'][0])['terms']['OnDemand']
    id1 = list(od)[0]
    id2 = list(od[id1]['priceDimensions'])[0]
    return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']

# Translate region code to region name
def get_region_name(region_code):
    default_region = 'EU (Frankfurt)'
    endpoint_file = resource_filename('botocore', 'data/endpoints.json')
    try:
        with open(endpoint_file, 'r') as f:
            data = json.load(f)
        return data['partitions'][0]['regions'][region_code]['description']
    except IOError:
        return default_region
    
def extract_ebs_storage_from_blockdevicemapping(b_d_mapping):
    """Extracts all ebs storage from a blockdevicemapping and stores them in storage_dict"""
    for device in b_d_mapping:
        if "Ebs" in device:
            storage_dict[device["Ebs"]["VolumeType"]] += device["Ebs"]["VolumeSize"]
     
    
#dict for all storage 
storage_dict = {
    'standard': 0,
    'gp2': 0,
    'io1': 0,
    'st1': 0,
    'sc1': 0  
}    

extract_ebs_storage_from_blockdevicemapping(storage_settings)
extract_ebs_storage_from_blockdevicemapping(root_storage_mapping)
print(storage_dict)
# Use AWS Pricing API at eu-central-1
#'eu-central-1' not working -> Pricing the same ? 
pricing_client = session.client('pricing', region_name='us-east-1')

# Get current price for a given instance, region and os
# make operation system not hardcoded
instance_price_per_hour = float(get_instance_price(get_region_name("eu-central-1"), instance_type, 'Linux'))

#For example, let's say that you provision a 2000 GB volume for 12 hours (43,200 seconds) in a 30 day month. In a region that charges $0.10 per GB-month, you would be charged $3.33 for the volume ($0.10 per GB-month * 2000 GB * 43,200 seconds / (86,400 seconds/day * 30 day-month)).
#source: https://aws.amazon.com/ebs/pricing/?nc1=h_ls

#get price of used storage
storage_price_per_hour = sum([float(get_storage_price(get_region_name("eu-central-1"), volume_type)) * float(volume_size)/ 30 / 24 for volume_type, volume_size in storage_dict.items()])

print("Instance cost per hour: "  + str(instance_price_per_hour))
print("Storage cost per hour: "  + str(storage_price_per_hour))

{'standard': 0, 'gp2': 264, 'io1': 0, 'st1': 0, 'sc1': 0}
Instance cost per hour: 0.0067
Storage cost per hour: 0.04363333333333333


## Calculate total costs by using calculated uptimes and (instance/storage) prices


In [10]:
#calculate price for each instance and then sum up the prices of all instances up to once total price
total_instance_cost = sum(map(lambda x: x * instance_price_per_hour, time_diff_in_hours)) 
total_storage_cost =  sum(map(lambda x: x * storage_price_per_hour, time_diff_in_hours))       

print(f"The total instance cost of {VM_count} {instance_type} instances running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_instance_cost} USD.")
print(f"The total storage  cost of {VM_count} {storage_dict} storage units running for averagely {np.round(np.mean(time_diff_in_hours),4)} hours was: {total_storage_cost} USD.")
total_cost = total_instance_cost + total_storage_cost
print(f"Total Cost: {total_cost} USD")

The total instance cost of 3 t2.nano instances running for averagely 0.0466 hours was: 0.000936138888888889 USD.
The total storage  cost of 3 {'standard': 0, 'gp2': 264, 'io1': 0, 'st1': 0, 'sc1': 0} storage units running for averagely 0.0466 hours was: 0.006096546296296296 USD.
Total Cost: 0.007032685185185185 USD


## Terminate the stopped instances for good

In [11]:
for i in ec2_instances:
    i.terminate()