# Policy Generator

+ Tag the traffic trace with the version
+ Ability to compare real vs prediction
+ Ability to generate predictions with variable accuracies
+ Group time periods based on Policy settings
+ Tag each group by decided version 


## 1. Get PD

In [8]:
import requests
import yaml
r = requests.get('https://raw.githubusercontent.com/CN-UPB/Pishahang/mvp-thesis/pish-examples/pwm-scripts/descriptors/multiversion/cirros1_mv_policy.yml')
# print(r.text)
PD = yaml.load(r.text, Loader=yaml.FullLoader)

# PD["versions"]

for _vm_type_key, _vm_type_value in PD["versions"].items():
    print(_vm_type_key)    
    for _vm_version_key, _vm_version_value in _vm_type_value.items():
        print(_vm_version_key)
        print(_vm_version_value)
        print("\n")

virtual_deployment_units_vm
cirros-image-1-vm
{'cost_per_min': 2, 'max_data_rate': 600, 'management_overhead': 60}


cirros-image-2-vm
{'cost_per_min': 2, 'max_data_rate': 600, 'management_overhead': 60}


virtual_deployment_units_gpu
cirros-image-1-gpu
{'cost_per_min': 10, 'max_data_rate': 3000, 'management_overhead': 6}


cirros-image-2-gpu
{'cost_per_min': 10, 'max_data_rate': 3000, 'management_overhead': 6}


virtual_deployment_units_con
cirros-image-1-con
{'cost_per_min': 2, 'max_data_rate': 1200, 'management_overhead': 6}


cirros-image-2-con
{'cost_per_min': 2, 'max_data_rate': 1200, 'management_overhead': 6}




# Policy Decision Steps

---



----
### Decision Matrix    

|                    	| Weights 	| Version1 	| Version2 	| Version3 	| Score 	|
|--------------------	|:-------:	|:--------:	|:--------:	|:--------:	|:--------:	|
| Cost (-)           	|    -4    	|     x1   	|    x2    	|    x3    	|    s    	|
| Over Provision (-) 	|    -3   	|     x1   	|    x2    	|    x3    	|    s    	|
| Overhead (-)       	|    -4   	|     x1   	|    x2    	|    x3    	|    s    	|
| Support deviation (+) |    3    	|     x1   	|    x2    	|    x3    	|    s    	|
| Same Version (+)   	|    3    	|     x1   	|    x2    	|    x3    	|    s    	|

In [9]:
import pandas as pd
import numpy as np

from sklearn import preprocessing
_SCORE_MIN, _SCORE_MAX = 1, 5

'''
Find the version with the max supported datarate
'''
def find_max_datarate_version(versions):
    _max_datarate = 0

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])

            if _vm_version_value["max_data_rate"] > _max_datarate:
                _max_datarate = _vm_version_value["max_data_rate"]
                _max_datarate_version = { _vm_type_key: { _vm_version_key : _vm_version_value } }

    return _max_datarate_version

'''
Get all the versions that can support the datarate demand
'''
def get_supported_versions(prediction, versions):
    # Iterate versions
    datarate_supported_versions = {}

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])
            # print(prediction["mean"])
            if _vm_version_value["max_data_rate"] >= prediction["mean"]:
                # check if key present else add
                if _vm_type_key in datarate_supported_versions:
                    datarate_supported_versions[_vm_type_key][_vm_version_key] = _vm_version_value
                else:
                    datarate_supported_versions[_vm_type_key] = {}
                    datarate_supported_versions[_vm_type_key][_vm_version_key] = _vm_version_value

    if len(datarate_supported_versions) == 0:
        return find_max_datarate_version(versions)
    return datarate_supported_versions

'''
Interpolate data points to a certain range
'''
def interpolate_array(values, min=_SCORE_MIN, max=_SCORE_MAX):
    return np.interp(values, (values.min(), values.max()), (min, max))


'''
Build the decision matrix for a given traffic prediction values 
'''
def build_decision_matrix(prediction, meta, versions):
    _decision_matrix = {}
    for _vm_type_key, _vm_type_value in versions.items():
        for _vm_version_key, _vm_version_value in _vm_type_value.items():
                if _vm_type_key not in _decision_matrix:
                    _decision_matrix[_vm_type_key] = {}
                if _vm_version_key not in _decision_matrix[_vm_type_key]:
                    _decision_matrix[_vm_type_key][_vm_version_key] = {}

                # Cost
                _decision_matrix[_vm_type_key][_vm_version_key]["cost"] = _vm_version_value['cost_per_min']

                # Support deviation
                if _vm_version_value['max_data_rate'] > (prediction['mean'] + prediction['std']):
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_deviation"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["support_deviation"] = 1

                # Over Provision
                _decision_matrix[_vm_type_key][_vm_version_key]["over_provision"] = int(_vm_version_value['max_data_rate']) - int(prediction['mean'])

                # Same Version
                if meta["current_version"] == _vm_version_key:
                    _decision_matrix[_vm_type_key][_vm_version_key]["same_version"] = 5
                else:
                    _decision_matrix[_vm_type_key][_vm_version_key]["same_version"] = 1

                # Overhead
                _decision_matrix[_vm_type_key][_vm_version_key]["overhead"] = _vm_version_value['management_overhead']


    decision_matrix_df = pd.DataFrame.from_dict({(i,j): _decision_matrix[i][j] 
                                for i in _decision_matrix.keys() 
                                for j in _decision_matrix[i].keys()},
                                orient='index')

    decision_matrix_df["over_provision"] = interpolate_array(decision_matrix_df["over_provision"])
    decision_matrix_df["cost"] = interpolate_array(decision_matrix_df["cost"])
    decision_matrix_df["overhead"] = interpolate_array(decision_matrix_df["overhead"])

    return decision_matrix_df

'''
Get policy decision given decision matrix and weights
'''
def get_policy_decision(decision_matrix, weights):

    # Negative
    cost = -1 * weights["negative"]["cost"]
    over_provision = -1 * weights["negative"]["over_provision"]
    overhead = -1 * weights["negative"]["overhead"]

    # Positive
    support_deviation = weights["positive"]['support_deviation']
    same_version = weights["positive"]['same_version']

    # WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
    weights_row = [cost, over_provision, overhead, support_deviation, same_version]

    for index_label, row_series in decision_matrix.iterrows():
        _row = np.array([row_series.cost, row_series.over_provision, row_series.overhead, row_series.support_deviation, row_series.same_version])

        decision_matrix.at[index_label , 'score'] = np.dot(np.array(weights_row), _row)

    _version = decision_matrix[decision_matrix.score == decision_matrix.score.max()].index[0]
    return _version

'''
Find the version with least cost
'''
def find_cheapest_version(versions):
    _cost = 999999

    for _vm_type_key, _vm_type_value in versions.items():
        # print(_vm_type_key)    

        for _vm_version_key, _vm_version_value in _vm_type_value.items():
            # print(_vm_version_key)
            # print(_vm_version_value["max_data_rate"])
            # FIXME: cost_per_min should be int
            if int(_vm_version_value["cost_per_min"]) < int(_cost):
                _cost = _vm_version_value["cost_per_min"]
                # _cost_version = { _vm_type_key: { _vm_version_key : _vm_version_value } }
                _cost_version = (_vm_type_key, _vm_version_key )

    return _cost_version

# Testing on sample prediction data

### PD
 
+ VM 
    - 'cost_per_min': '2'
    - 'max_data_rate': 1200
    - 'management_overhead': 60
+ GPU
    - 'cost_per_min': '5'
    - 'max_data_rate': 3000
    - 'management_overhead': 6
+ CON
    - 'cost_per_min': '1'
    - 'max_data_rate': 600
    - 'management_overhead': 4

---

# TEST 1
---

### Weights

`WEIGHTS = [-4, -3, -4, 2, 3]`

+ Cost (-4)
+ Over Provision (-3)
+ Overhead (-4)
+ Support deviation (2)
+ Same Version (3)

### Prediction

`prediction = { "mean": 800, "std": 100, "min": 800, "max": 1800 }`

+ Mean: 800
+ Standard Deviation: 100
+ Min: 800
+ Max: 1200

### Meta

`meta = { "current_version": "cirros-image-1-gpu" }`

+ Current Version: GPU

In [20]:
# Test 1
# WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
# WEIGHTS = PD["weights"]

WEIGHTS = {
    "negative": {
      "cost": 4,
      "over_provision": 3,
      "overhead": 4
    },
    "positive": {
      "support_deviation": 2,
      "same_version": 3
    }
  }

prediction = { "mean": 400, "std": 100, "min": 800, "max": 1800 }
meta = { "current_version": "cirros-image-1-gpu" }

supported_versions = get_supported_versions(prediction=prediction, versions=PD["versions"])
decision_matrix_df = build_decision_matrix(prediction=prediction, meta=meta, versions=supported_versions)

selected_type, selected_version = get_policy_decision(decision_matrix_df, WEIGHTS)

print("\nSelected version to deploy - ", selected_type, " : ", selected_version, "\n")

decision_matrix_df


Selected version to deploy -  virtual_deployment_units_con  :  cirros-image-1-con 



Unnamed: 0,Unnamed: 1,cost,support_deviation,over_provision,same_version,overhead,score
virtual_deployment_units_vm,cirros-image-1-vm,1.0,5,1.0,1,5.0,-4.0
virtual_deployment_units_vm,cirros-image-2-vm,1.0,5,1.0,1,5.0,-4.0
virtual_deployment_units_gpu,cirros-image-1-gpu,5.0,5,5.0,5,1.0,-12.0
virtual_deployment_units_gpu,cirros-image-2-gpu,5.0,5,5.0,1,1.0,-24.0
virtual_deployment_units_con,cirros-image-1-con,1.0,5,2.0,1,1.0,1.0
virtual_deployment_units_con,cirros-image-2-con,1.0,5,2.0,1,1.0,1.0


# TEST 2
---

### Weights

`WEIGHTS = [-4, -3, -4, 2, 3]`

+ Cost (-4)
+ Over Provision (-3)
+ Overhead (-4)
+ Support deviation (2)
+ Same Version (3)

### Prediction

`prediction = { "mean": 1800, "std": 100, "min": 1600, "max": 2000 }`

+ Mean: 1800
+ Standard Deviation: 100
+ Min: 1600
+ Max: 2000

### Meta

`meta = { "current_version": "cirros-image-1-gpu" }`

+ Current Version: GPU

In [17]:
# Test 2
# WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
WEIGHTS = {
    "negative": {
      "cost": 4,
      "over_provision": 3,
      "overhead": 4
    },
    "positive": {
      "support_deviation": 2,
      "same_version": 3
    }
  }

prediction = { "mean": 1800, "std": 100, "min": 1600, "max": 2000 }

meta = { "current_version": "cirros-image-1-gpu" }
supported_versions = get_supported_versions(prediction=prediction, versions=PD["versions"])
decision_matrix_df = build_decision_matrix(prediction=prediction, meta=meta, versions=supported_versions)

selected_type, selected_version = get_policy_decision(decision_matrix_df, WEIGHTS)

print("\nSelected version to deploy - ", selected_type, " : ", selected_version, "\n")

decision_matrix_df


Selected version to deploy -  virtual_deployment_units_gpu  :  cirros-image-1-gpu 



Unnamed: 0,Unnamed: 1,cost,support_deviation,over_provision,same_version,overhead,score
virtual_deployment_units_gpu,cirros-image-1-gpu,5.0,5,5.0,5,5.0,-30.0
virtual_deployment_units_gpu,cirros-image-2-gpu,5.0,5,5.0,1,5.0,-42.0


# TEST 3
---

### Weights

`WEIGHTS = [-4, -3, -4, 2, 3]`

+ Cost (-4)
+ Over Provision (-3)
+ Overhead (-4)
+ Support deviation (2)
+ Same Version (3)

### Prediction

`prediction = { "mean": 600, "std": 100, "min": 500, "max": 800 }`

+ Mean: 600
+ Standard Deviation: 100
+ Min: 500
+ Max: 800

### Meta

`meta = { "current_version": "cirros-image-1-gpu" }`

+ Current Version: GPU

In [12]:
# Test 3
# WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
WEIGHTS = [-4, -3, -4, 2, 3]

prediction = { "mean": 600, "std": 100, "min": 500, "max": 800 }

meta = { "current_version": "cirros-image-1-gpu" }
supported_versions = get_supported_versions(prediction=prediction, versions=PD["versions"])
decision_matrix_df = build_decision_matrix(prediction=prediction, meta=meta, versions=supported_versions)

selected_type, selected_version = get_policy_decision(decision_matrix_df, PD["weights"])

print("\nSelected version to deploy - ", selected_type, " : ", selected_version, "\n")

decision_matrix_df


Selected version to deploy -  virtual_deployment_units_con  :  cirros-image-1-con 



Unnamed: 0,Unnamed: 1,cost,support_deviation,over_provision,same_version,overhead,score
virtual_deployment_units_vm,cirros-image-1-vm,1.0,1,1.0,1,5.0,-22.0
virtual_deployment_units_vm,cirros-image-2-vm,1.0,1,1.0,1,5.0,-22.0
virtual_deployment_units_gpu,cirros-image-1-gpu,5.0,5,5.0,5,1.0,-14.0
virtual_deployment_units_gpu,cirros-image-2-gpu,5.0,5,5.0,1,1.0,-26.0
virtual_deployment_units_con,cirros-image-1-con,1.0,5,2.0,1,1.0,-1.0
virtual_deployment_units_con,cirros-image-2-con,1.0,5,2.0,1,1.0,-1.0


# Run Policy on Dataset
---


In [13]:
import pandas as pd
import numpy as np

LOOK_AHEAD = 15 # Mins

traffic_training_complete = pd.read_csv(r'/plugins/son-mano-traffic-forecast/notebooks/data/dataset_1_day_traffic.csv', index_col=0)
print(traffic_training_complete.shape)
traffic_training_complete.head(5)

traffic_grouped = traffic_training_complete.groupby(
                    np.arange(len(traffic_training_complete))//LOOK_AHEAD).agg(['mean', 'std', 'min', 'max'])
# result.index = df.loc[1::2, 'Idx']
print(traffic_grouped.shape)
traffic_grouped.head(5)

(2430, 2)
(162, 8)


Unnamed: 0_level_0,sent,sent,sent,sent,received,received,received,received
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max
0,235.870341,87.827097,109.694024,368.379084,0.473819,0.257188,0.074325,0.817899
1,202.526292,81.613763,108.613811,396.220143,0.478222,0.29859,0.007725,0.892331
2,275.178908,74.691772,121.822538,365.928442,0.570156,0.306537,0.163706,0.985429
3,274.926827,78.07445,144.065484,393.991136,0.423187,0.273238,0.024922,0.968573
4,247.821193,86.69057,123.862868,355.326101,0.524129,0.326362,0.007531,0.931334


In [14]:
# Run Policy on Dataset
# WEIGHTS --> [cost, over_provision, overhead, support_deviation, same_version]
WEIGHTS = [-4, -3, -3, 2, 3]

traffic_policy_test = traffic_grouped['sent'].copy()
# traffic_policy_test.plot()

# iterate over the dataframe row by row and set version
meta = { 
   "current_version": "cirros-image-1-con",
   "current_version_baseline": "cirros-image-1-con"
}

switch_counter = {
   "baseline": 0,
   "policy": 0
}

with open("output_debug.log", "w") as f:
   for index_label, row_series in traffic_policy_test.iterrows():
      supported_versions = get_supported_versions(prediction=row_series, versions=PD["versions"])
      decision_matrix_df = build_decision_matrix(prediction=row_series, meta=meta, versions=supported_versions)

      _selected_version = ":".join(get_policy_decision(decision_matrix_df, WEIGHTS))
      traffic_policy_test.at[index_label , 'policy'] = _selected_version

      _selected_version_baseline = ":".join(find_cheapest_version(versions=supported_versions))
      traffic_policy_test.at[index_label , 'baseline'] = _selected_version_baseline

      if not _selected_version.split(":")[1] == meta["current_version"]:
         switch_counter["policy"] += 1

      if not _selected_version_baseline.split(":")[1] == meta["current_version_baseline"]:
         switch_counter["baseline"] += 1

      meta = {
         "current_version": _selected_version.split(":")[1],
         "current_version_baseline": _selected_version_baseline.split(":")[1]
      } 

      f.write(str(row_series))
      f.write(_selected_version)
      f.write("\n")
      f.write(str(decision_matrix_df))
      f.write("\n\n")
      # print(row_series)
      # print(decision_matrix_df)
      # print("\n\n")
   
print(switch_counter)
traffic_policy_test.head(10)

TypeError: list indices must be integers or slices, not str