In [1]:
import sys
sys.path.append('/home/lorenz/github/cost-optimal-model/')


In [2]:
import random
import math
import pandas as pd
from models.scripts.batch_scheduling import Scheduler
from models.utils import calc_cost

def prepare_tests():
    snowflake_queries = pd.read_csv("../../input/snowflake_queries.csv")
    seed = 0
    queries = snowflake_queries.sample(n=10, random_state=seed)
    queries_dict = queries.to_dict("records")
    tests = [
        {
            "test_id": "Test Snowflake",
            "queries": queries_dict,
            "seed": seed,
            "t0": 6000,
            "iterations": 500,
            "output_file": "test_snowflake.json"
        }
    ]

    return tests

def simulate_annealing(instances, queries, kmax, t0, seed):
    random.seed(a=seed)
    scheduler = Scheduler(instances)

    for query in queries:
        query["total_reads"] = round(query["total_reads"])
        results = scheduler.calc_time(query)
        best = calc_cost([results])[0].iloc[0]
        query["used_mem"] = best['used_mem_caching'] + best['used_mem_spooling']
        query["used_sto"] = best['used_sto_caching'] + best['used_sto_spooling']
        query["used_cores"] = best["used_cores"]
        query["cpu_time"] = best["time_cpu"]
        query["rw_mem"] = best["rw_mem"]
        query["rw_sto"] = best["rw_sto"]
        query["rw_s3"] = best["rw_s3"]

    current_cost = setup_initial_state(queries, scheduler)

    for k in range(0, kmax):
        t = temperature(k, t0)
        query, provision, id = neighbour(queries, scheduler)
        new_cost = neighbour_cost(current_cost, query, provision, id, scheduler)
        if acceptance_probability(current_cost, new_cost, t) >= random.random():
            apply_change(query, provision, id, scheduler)
            current_cost = new_cost
    current_state(queries)
    correctness_check(queries, current_cost)
    return current_cost


def setup_initial_state(queries, scheduler):
    total_cost = 0
    for query in queries:
        index = random.randrange(len(scheduler.suitable_instance_types(query).index))
        type_id = scheduler.instance_types.index[index]
        instance_id = scheduler.schedule_new_instance(type_id)
        query["instance_id"] = instance_id
        query["type_id"] = type_id

        query_cost = scheduler.schedule_query_cost_new_instance(type_id, query)
        scheduler.schedule_query(query, instance_id)

        total_cost += query_cost
    return total_cost

def temperature(k, t0):
    return t0 / (1 + k)

def neighbour(queries, scheduler):
    query = random.choice(queries)
    provisioned_instances = scheduler.suitable_provisioned_instances(query)
    instance_types = scheduler.suitable_instance_types(query)
    index = random.randrange(len(provisioned_instances.index) + 1)
    if index < len(provisioned_instances.index):
        return query, False, provisioned_instances.index[index]
    else:
        index = random.randrange(len(instance_types.index))
        return query, True, instance_types.index[index]

def neighbour_cost(cost, query, provision, id, scheduler):
    if provision:
        cost += scheduler.schedule_query_cost_new_instance(id, query)
        cost += scheduler.unschedule_query_cost(query["instance_id"], query)
    else:
        if id == query["instance_id"]:
            return cost
        print(id)
        display(scheduler.instance_types)
        cost += scheduler.schedule_query_cost_existing_instance(id, query)
        cost += scheduler.unschedule_query_cost(query["instance_id"], query)
    return cost

def acceptance_probability(cost, new_cost, t):
    if new_cost < cost:
        return 1
    return math.exp(-(new_cost - cost)/t)

def apply_change(query, provision, id, scheduler):
    old_instance_id = query["instance_id"]
    old_tpe_id = query["type_id"]
    if provision:
        instance_id = scheduler.schedule_new_instance(id)
        scheduler.schedule_query(query, instance_id)
        query["instance_id"] = instance_id
        query["type_id"] = id
    else:
        type_id = scheduler.provisioned_instances.loc[id, "id"]
        scheduler.schedule_query(query, id)
        query["instance_id"] = id
        query["type_id"] = type_id
    scheduler.unschedule_query(query, old_instance_id)

def current_state(queries):
    for query in queries:
        print("instance type: " + query["type_id"])
        print("instance id: " + query["instance_id"] + "\n")

def correctness_check(queries, current_cost):
    # TODO
    return True 
        

In [3]:
from preprocessing.instances import inst_set_transform


if __name__ == '__main__':
    instances = inst_set_transform()
    for params in prepare_tests():
            print("seed: " + str(params["seed"]))
            print("t0: " + str(params["t0"]))
            simulate_annealing(
                instances.copy(),
                params["queries"],
                params["iterations"],
                params["t0"],
                params["seed"]
            )

    print("Done")

seed: 0
t0: 6000
8


Unnamed: 0,memory_Gib,vcpu_count,clock_ghz,storage_Gib,storage_count,storage_type,network_Gbps,network_is_steady,cost_usdph,loading_comment,id_prefix,id_numstr,id_number,id_slice,id_slice_factor,id_slice_of,id_slice_net,id_slice_sto,calc_net_speed,calc_s3_speed,calc_mem_speed,calc_sto_speed,calc_cpu_real,calc_mem_caching,calc_sto_caching,calc_mem_spooling,calc_sto_spooling,busy_cores,used_mem,used_sto,rw_mem,rw_sto,rw_s3,query_cpu_times,id
0,384.0,96.0,3.1,0.0,0.0,EBS,100,True,5.712,,m5n,24,24.0,24.0,1.0,8767,100.0,0.0,12.5,10.0,50,12.5,48.0,192.0,0.0,192.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,[0.21],m5n.24xlarge
1,384.0,96.0,3.1,0.0,0.0,EBS,100,True,5.712,,m5n,24,24.0,24.0,1.0,8767,100.0,0.0,12.5,10.0,50,12.5,48.0,192.0,0.0,192.0,0.0,8.0,2.0,0.0,0.0,0.0,0.0,[0.31],m5n.24xlarge
2,488.0,64.0,2.3,15200.0,8.0,NVMe,25,True,4.992,,i3,16,16.0,16.0,1.0,8825,25.0,8.0,3.125,2.5,50,16.0,32.0,244.0,7600.0,244.0,7600.0,8.0,2.0,0.0,0.0,0.0,0.0,[0.1275],i3.16xlarge
3,384.0,48.0,4.0,1800.0,2.0,NVMe,25,True,4.464,,z1d,12,12.0,12.0,1.0,8771,25.0,2.0,3.125,2.5,50,4.0,24.0,192.0,900.0,192.0,900.0,4.0,2.0,0.0,0.0,0.0,0.0,[30.05],z1d.12xlarge
4,384.0,96.0,3.1,0.0,0.0,EBS,25,True,4.608,,m5,24,24.0,24.0,1.0,8627,25.0,0.0,3.125,2.5,50,3.125,48.0,192.0,0.0,192.0,0.0,4.0,16.0,38.0,24.0,23.0,0.0,[552.6325],m5.24xlarge
5,768.0,96.0,3.1,0.0,0.0,EBS,25,True,6.048,,r5,24,24.0,24.0,1.0,8712,25.0,0.0,3.125,2.5,50,3.125,48.0,384.0,0.0,384.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,[1.1575],r5.24xlarge
6,768.0,96.0,3.1,0.0,0.0,EBS,25,True,6.048,,r5,24,24.0,24.0,1.0,8712,25.0,0.0,3.125,2.5,50,3.125,48.0,384.0,0.0,384.0,0.0,8.0,2.0,0.0,0.0,0.0,0.0,[0.21375],r5.24xlarge
7,768.0,96.0,3.1,3600.0,4.0,NVMe,100,True,8.016,,r5dn,24,24.0,24.0,1.0,8837,100.0,4.0,12.5,10.0,50,8.0,48.0,384.0,1800.0,384.0,1800.0,4.0,2.0,0.0,0.0,0.0,0.0,[3.64],r5dn.24xlarge
8,384.0,96.0,3.1,3600.0,4.0,NVMe,25,True,5.424,,m5d,24,24.0,24.0,1.0,8683,25.0,4.0,3.125,2.5,50,8.0,48.0,192.0,1800.0,192.0,1800.0,8.0,2.0,0.0,0.0,0.0,0.0,[0.0125],m5d.24xlarge
9,488.0,64.0,2.3,15200.0,8.0,NVMe,25,True,4.992,,i3,16,16.0,16.0,1.0,8825,25.0,8.0,3.125,2.5,50,16.0,32.0,244.0,7600.0,244.0,7600.0,8.0,2.0,0.0,0.0,0.0,0.0,[0.08625],i3.16xlarge


KeyError: '8'