# Policy Generator

+ Tag the traffic trace with the version
+ Ability to compare real vs prediction
+ Ability to generate predictions with variable accuracies
+ Group time periods based on Policy settings
+ Tag each group by decided version 


## 1. Get PD

In [1]:
import requests
import yaml
r = requests.get('https://raw.githubusercontent.com/CN-UPB/Pishahang/mvp-thesis/pish-examples/pwm-scripts/descriptors/multiversion/transcoder_mv_policy.yml')
# print(r.text)
PD = yaml.load(r.text, Loader=yaml.FullLoader)

# PD["versions"]

# PD["versions"] = { 
#     'virtual_deployment_units_gpu': {'transcoder-image-1-gpu': {'cost_per_min': 10,
#    'max_data_rate': 3000,
#    'management_overhead': 6}},
#  'virtual_deployment_units_con': {'transcoder-image-1-con': {'cost_per_min': 3,
#    'max_data_rate': 1200,
#    'management_overhead': 6}}}

# for _vm_type_key, _vm_type_value in PD["versions"].items():
#     print(_vm_type_key)    
#     for _vm_version_key, _vm_version_value in _vm_type_value.items():
#         print(_vm_version_key)
#         print(_vm_version_value)
#         print("\n")

# Policy Decision Steps

---



----
### Decision Matrix    

|                    	| Weights 	| Version1 	| Version2 	| Version3 	| Score 	|
|--------------------	|:-------:	|:--------:	|:--------:	|:--------:	|:--------:	|
| Cost (-)           	|    -4    	|     x1   	|    x2    	|    x3    	|    s    	|
| Over Provision (-) 	|    -3   	|     x1   	|    x2    	|    x3    	|    s    	|
| Overhead (-)       	|    -4   	|     x1   	|    x2    	|    x3    	|    s    	|
| Support deviation (+) |    3    	|     x1   	|    x2    	|    x3    	|    s    	|
| Same Version (+)   	|    3    	|     x1   	|    x2    	|    x3    	|    s    	|

In [2]:
import pandas as pd
import numpy as np

from sklearn import preprocessing
_SCORE_MIN, _SCORE_MAX = 1, 5

'''
Find the version with the max supported datarate
'''
def find_max_datarate_version(versions):
    _max_datarate = 0

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])

            if _vm_version_value["max_data_rate"] > _max_datarate:
                _max_datarate = _vm_version_value["max_data_rate"]
                _max_datarate_version = { _vm_type_key: { _vm_version_key : _vm_version_value } }

    return _max_datarate_version

'''
Get all the versions that can support the datarate demand
'''
def get_supported_versions(prediction, versions):
    # Iterate versions
    datarate_supported_versions = {}

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])
            # print(prediction["mean"])
            if _vm_version_value["max_data_rate"] >= prediction["mean"]:
                # check if key present else add
                if _vm_type_key in datarate_supported_versions:
                    datarate_supported_versions[_vm_type_key][_vm_version_key] = _vm_version_value
                else:
                    datarate_supported_versions[_vm_type_key] = {}
                    datarate_supported_versions[_vm_type_key][_vm_version_key] = _vm_version_value

    if len(datarate_supported_versions) == 0:
        return find_max_datarate_version(versions)
    return datarate_supported_versions

'''
Interpolate data points to a certain range
'''
def interpolate_array(values, min=_SCORE_MIN, max=_SCORE_MAX):
    return np.interp(values, (values.min(), values.max()), (min, max))


'''
Build the decision matrix for a given traffic prediction values 
'''
def build_decision_matrix(prediction, meta, versions):
    _decision_matrix = {}
    for _vm_type_key, _vm_type_value in versions.items():
        for _vm_version_key, _vm_version_value in _vm_type_value.items():
                if _vm_type_key not in _decision_matrix:
                    _decision_matrix[_vm_type_key] = {}
                if _vm_version_key not in _decision_matrix[_vm_type_key]:
                    _decision_matrix[_vm_type_key][_vm_version_key] = {}

                # Cost
                _decision_matrix[_vm_type_key][_vm_version_key]["cost"] = _vm_version_value['cost_per_min']

                # Support deviation
                if _vm_version_value['max_data_rate'] > (prediction['mean'] + prediction['std']):
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_deviation"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_deviation"] = 1

                # Over Provision
                _decision_matrix[_vm_type_key][_vm_version_key]["over_provision"] = int(_vm_version_value['max_data_rate']) - int(prediction['mean'])

                # Same Version
                if meta["current_version"] == _vm_version_key:
                    _decision_matrix[_vm_type_key][_vm_version_key]["same_version"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["same_version"] = 1

                # Overhead
                _decision_matrix[_vm_type_key][_vm_version_key]["overhead"] = _vm_version_value['management_overhead']

                # Support max datarate
                if _vm_version_value['max_data_rate'] >= (prediction['max']):
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_max"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_max"] = 1

                # Support recent history
                if _vm_version_value['max_data_rate'] >= (meta["recent_history"]["mean"]):
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_recent_history"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_recent_history"] = 1


    decision_matrix_df = pd.DataFrame.from_dict({(i,j): _decision_matrix[i][j] 
                                for i in _decision_matrix.keys() 
                                for j in _decision_matrix[i].keys()},
                                orient='index')

    decision_matrix_df["over_provision"] = interpolate_array(decision_matrix_df["over_provision"])
    decision_matrix_df["cost"] = interpolate_array(decision_matrix_df["cost"])
    decision_matrix_df["overhead"] = interpolate_array(decision_matrix_df["overhead"])

    return decision_matrix_df

'''
Get policy decision given decision matrix and weights
'''
def get_policy_decision(decision_matrix, weights):

    # Negative
    cost = -1 * weights["negative"]["cost"]
    over_provision = -1 * weights["negative"]["over_provision"]
    overhead = -1 * weights["negative"]["overhead"]

    # Positive
    support_deviation = weights["positive"]['support_deviation']
    same_version = weights["positive"]['same_version']

    # WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
    weights_row = [cost, over_provision, overhead, support_deviation, same_version]

    for index_label, row_series in decision_matrix.iterrows():
        _row = np.array([row_series.cost, row_series.over_provision, row_series.overhead, row_series.support_deviation, row_series.same_version])

        decision_matrix.at[index_label , 'score'] = np.dot(np.array(weights_row), _row)

    _version = decision_matrix[decision_matrix.score == decision_matrix.score.max()].index[0]
    return _version

'''
Find the version with least cost
'''
def find_cheapest_version(versions):
    _cost = None

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])
            # FIXME: cost_per_min should be int
            if _cost is None:
                _cost = _vm_version_value["cost_per_min"]
                _cost_version = (_vm_type_key, _vm_version_key )

            if int(_vm_version_value["cost_per_min"]) < int(_cost):
                _cost = _vm_version_value["cost_per_min"]
                # _cost_version = { _vm_type_key: { _vm_version_key : _vm_version_value } }
                _cost_version = (_vm_type_key, _vm_version_key )

    return _cost_version


# Run Policy on Dataset
---


In [3]:
import pandas as pd
import numpy as np

LOOK_AHEAD = 5 # Mins

traffic_training_complete = pd.read_csv(r'/plugins/son-mano-traffic-forecast/notebooks/data/dataset_six_traffic.csv', index_col=0)
print(traffic_training_complete.shape)
traffic_training_complete.head(5)

# traffic_training_complete = traffic_training_complete[:720]

traffic_grouped = traffic_training_complete.groupby(
                    np.arange(len(traffic_training_complete))//LOOK_AHEAD).agg(['mean', 'std', 'min', 'max'])

traffic_history = traffic_training_complete.reset_index().copy().drop('index', axis=1)

# traffic_history = traffic_history[:1440] 
# result.index = df.loc[1::2, 'Idx']
print(traffic_grouped.shape)
print(traffic_history.shape)

traffic_history.head(5)

(11000, 1)
(2200, 4)
(11000, 1)


Unnamed: 0,sent
0,1143.842358
1,1181.595847
2,1158.08052
3,1189.863609
4,1190.384826


In [9]:
# Run Policy on Dataset
# WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
WEIGHTS = {
    "negative": {
      "cost": 5,
      "over_provision": 2,
      "overhead": 3
    },
    "positive": {
      "support_deviation": 1,
      "same_version": 1,
      "support_max": 1,
      "support_recent_history": 1
    }
  }

prediction = { "mean": 400, "std": 100, "min": 800, "max": 1800 }

traffic_policy_test = traffic_grouped['sent'].copy()
# traffic_policy_test.plot()

# iterate over the dataframe row by row and set version
meta = { 
   "current_version": "transcoder-image-1-vm",
   "current_version_history": "transcoder-image-1-vm",
   "recent_history": None
}

switch_counter = {
   "history": 0,
   "policy": 0,
   "notsame": 0
}

with open("output_debug.log", "w") as f:
   for index_label, row_series in traffic_policy_test.iterrows():
      if meta["recent_history"] is None:
         meta["recent_history"] = row_series

      supported_versions = get_supported_versions(prediction=row_series, versions=PD["versions"])

      decision_matrix_df = build_decision_matrix(prediction=row_series, meta=meta, versions=supported_versions)

      _selected_version = ":".join(get_policy_decision(decision_matrix_df, WEIGHTS))
      traffic_policy_test.at[index_label , 'policy'] = _selected_version
      
      if not _selected_version.split(":")[1] == meta["current_version"]:
         switch_counter["policy"] += 1

      f.write("\nrecent_history\n")
      f.write(str(meta["recent_history"]))
      f.write("\nForecast\n")
      f.write(str(row_series))

      meta = {
         "current_version": _selected_version.split(":")[1],
         "recent_history": row_series
      } 


      f.write("\n\n_selected_version\n")
      f.write(_selected_version)
      
      f.write("\n\n")
      f.write(str(decision_matrix_df))
      f.write("\n\n")
      # print(row_series)
      # print(decision_matrix_df)
      # print("\n\n")
   
# print(switch_counter)
# traffic_policy_test.to_csv('./data/{}m_policy_decisions_dataset_six_traffic.csv'.format(LOOK_AHEAD))
# traffic_policy_test.head(10)

In [5]:
meta = { 
   "current_version_history": "transcoder-image-1-vm",
   "recent_history": None
}

row_counter = 0

for index_label, row_series in traffic_history.iterrows():
   if meta["recent_history"] is None:
      meta["recent_history"] = row_series['sent']

   supported_versions_history = get_supported_versions(prediction={"mean": meta['recent_history']}, versions=PD["versions"])

   _selected_version_history = ":".join(find_cheapest_version(versions=supported_versions_history))
   traffic_history.at[index_label , 'history'] = _selected_version_history

   if not _selected_version_history.split(":")[1] == meta["current_version_history"]:
      switch_counter["history"] += 1

   meta = {
      "current_version_history": _selected_version_history.split(":")[1],
      "recent_history": row_series['sent']
   } 

   row_counter += 1

# print(switch_counter)


## Merge Data

In [6]:
# pd.concat([traffic_policy_test]*2, ignore_index=True)
final_decision_dataset = traffic_policy_test.iloc[np.repeat(np.arange(len(traffic_policy_test)), 5)].reset_index().drop('index', axis=1)
final_decision_dataset['history'] = traffic_history['history']

print("Switch Stats")
print(switch_counter)

print("\nPolicy")
print(final_decision_dataset['policy'].value_counts())

print("\nHistory")
print(final_decision_dataset['history'].value_counts())

final_decision_dataset.to_csv('./data/{}m_policy_decisions_dataset_six_traffic.csv'.format(LOOK_AHEAD))
print(final_decision_dataset.shape)
final_decision_dataset.head(10)


Switch Stats
{'history': 625, 'policy': 45, 'notsame': 0}

Policy
virtual_deployment_units_con:transcoder-image-1-con    6350
virtual_deployment_units_gpu:transcoder-image-1-gpu    4650
Name: policy, dtype: int64

History
virtual_deployment_units_gpu:transcoder-image-1-gpu    4595
virtual_deployment_units_con:transcoder-image-1-con    3692
virtual_deployment_units_vm:transcoder-image-1-vm      2713
Name: history, dtype: int64
(11000, 6)


Unnamed: 0,mean,std,min,max,policy,history
0,1172.753432,20.8144,1143.842358,1190.384826,virtual_deployment_units_con:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
1,1172.753432,20.8144,1143.842358,1190.384826,virtual_deployment_units_con:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
2,1172.753432,20.8144,1143.842358,1190.384826,virtual_deployment_units_con:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
3,1172.753432,20.8144,1143.842358,1190.384826,virtual_deployment_units_con:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
4,1172.753432,20.8144,1143.842358,1190.384826,virtual_deployment_units_con:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
5,1218.337667,8.049923,1210.799511,1230.664978,virtual_deployment_units_gpu:transcoder-image-...,virtual_deployment_units_con:transcoder-image-...
6,1218.337667,8.049923,1210.799511,1230.664978,virtual_deployment_units_gpu:transcoder-image-...,virtual_deployment_units_gpu:transcoder-image-...
7,1218.337667,8.049923,1210.799511,1230.664978,virtual_deployment_units_gpu:transcoder-image-...,virtual_deployment_units_gpu:transcoder-image-...
8,1218.337667,8.049923,1210.799511,1230.664978,virtual_deployment_units_gpu:transcoder-image-...,virtual_deployment_units_gpu:transcoder-image-...
9,1218.337667,8.049923,1210.799511,1230.664978,virtual_deployment_units_gpu:transcoder-image-...,virtual_deployment_units_gpu:transcoder-image-...


In [7]:
# https://stackoverflow.com/questions/53766397/how-to-center-the-grid-of-a-plot-on-scatter-points
# https://stackoverflow.com/questions/47684652/how-to-customize-marker-colors-and-shapes-in-scatter-plot
DRAW_SCATTER = False
if DRAW_SCATTER:
    from matplotlib import pyplot as plt 

    markers = ["s" , "s" , "o" , "v" , "^" , "<", ">"]
    colors = ['r','g','b','c','m', 'y', 'k']

    x = final_decision_dataset.index
    y = [final_decision_dataset.policy, final_decision_dataset.history]
    labels = ['policy', 'history']

    # traffic_policy_test.reset_index().plot.scatter(figsize=(20,10), fontsize=20, x=x, y=y, marker="v")

    fig, ax = plt.subplots(figsize=(200,2))
    for i in range(2): #for each of the 7 features 
        mi = markers[i] #marker for ith feature 
        xi = x #x array for ith feature .. here is where you would generalize      different x for every feature
        yi = y[i] #y array for ith feature 
        ci = colors[i] #color for ith feature 
        ax.scatter(xi,yi, marker=mi, color=ci, s=49, label=labels[i])

    ax.set_yticks(np.arange(3))
    ax.set_yticks(np.arange(3+1)-0.5, minor=True)

    ax.set_xticks(np.arange(len(y[0])))
    ax.set_xticks(np.arange(len(y[0])+1)-0.5, minor=True)

    ax.grid(True, which="minor")
    ax.set_aspect("equal")
    plt.legend(loc="upper left")
    plt.show()

