In [None]:
#@title Dependencies install
!pip install pykalman &> /dev/null
!pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple cloudplanner==0.0.53 -U &> /dev/null
# !pip install plotly==4.8.0 >> /dev/null
# !pip install plotly>=4.0.0 >> /dev/null
!pip install plotly &> /dev/null
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca &> /dev/null
!chmod +x /usr/local/bin/orca &> /dev/null
!apt-get install xvfb libgtk2.0-0 libgconf-2-4 &> /dev/null

from tqdm import tqdm
from sklearn.preprocessing import minmax_scale
import pandas as pd
import traceback
from os import listdir, mkdir
from os.path import isfile, join, isdir

In [None]:
#@title Data Provider Setup
class ExperimentDataProvider:
    BASE_PATH = 'vm_data' #@param {type:"string"}

    def __init__(self):
        if not isdir(self.BASE_PATH):
            raise FileNotFoundError('please create folder vm_data with vm csvs')

        self.csv_names = [f for f in listdir('vm_data') if isfile(join('vm_data', f))]
        self._experiments_data_count = {'DEMO': len(self.csv_names)}

    def get_experiment_count(self):
        return len(self._experiments_data_count)
    
    def get_experiment_len(self, experiment):
        return self._experiments_data_count[experiment]

    def get_experiment_names(self):
        return self._experiments_data_count.keys()
    
    def get_experiment_generator(self, experiment, metric):
        if experiment not in self._experiments_data_count:
            return None
        else:
            return self._experiment_gen(experiment, self._experiments_data_count[experiment], metric)
    
    def _experiment_gen(self, experiment, exp_num, metric):
        for i in range(exp_num):
            try:
                df = pd.read_csv(join(self.BASE_PATH, self.csv_names[i]), parse_dates=['timestamp'])
                if metric not in df:
                    continue
                df.dropna(inplace=True)
                df.reset_index(inplace=True)
                df[metric] = minmax_scale(df[metric], feature_range=(0, 100))
                yield df
            except Exception as e:
                print(traceback.format_exc())

In [None]:
#@title Launch the experiments
from cloudplanner.usage_prediction.utils import run_batch_experiment, analyze_batch_result
from cloudplanner.anomaly_detection.algorithms import LimitAlgorithm, MedianAlgorithm, KalmanAlgorithm, SavgolAlgorithm, WeightedHybridAlgorithm
import json

RESULTS_PATH = 'experiment_results' #@param {type: "string"}
if not isdir(RESULTS_PATH):
    mkdir(RESULTS_PATH)

dp = ExperimentDataProvider()

completed_experiments = []

for exp_name in dp.get_experiment_names():
    dfs = ExperimentDataProvider().get_experiment_generator(exp_name, 'cpu.usage.average')

    print('Experiment', exp_name, 'Length:', dp.get_experiment_len(exp_name))
    hybrid = WeightedHybridAlgorithm([(MedianAlgorithm(), 0.9),
                                    (LimitAlgorithm(upper_treshold=20, lower_treshold=0), 0.3),
                                    (KalmanAlgorithm(tolerance_multiplier=1), 0.5),
                                    (SavgolAlgorithm(), 0.7)])


    resb = run_batch_experiment(dfs, [LimitAlgorithm(upper_treshold=50, lower_treshold=0),
                                    MedianAlgorithm(),
                                    hybrid])
    
    try:
        with open(join(OUTPUT_PATH, exp_name + '.json'), 'w') as fp:
            json.dump(resb, fp)
        with open(join(OUTPUT_PATH, exp_name + '.json'), 'w') as fp:
            json.dump(resb, fp)
        print('Successfully completed experiment', exp_name)
    except Exception:
        print(traceback.format_exc())
        print('Failed to save data for experiment', exp_name)


In [None]:
#@title (Optional) Reload the experiments results
import json
from os.path import isfile, join

RESULTS_PATH = 'experiment_results' #@param {type: "string"}
with open(join(RESULTS_PATH, 'DEMO.json'), 'r') as fp:
    resb = json.load(fp)

In [None]:
#@title heatmap and results analysis
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import json
from cloudplanner.usage_prediction.utils import analyze_batch_result


resba = analyze_batch_result(resb)

frames = []
for key in resba.keys():
    frames.append(pd.DataFrame(resba[key], index=[key]))
resdf = pd.concat(frames)

resdf2 = StandardScaler().fit_transform(resdf)

# fig1 = px.imshow(resdf,
#                 x=list(resdf.columns),
#                 y=list(resdf.index),
#                 color_continuous_scale='RdBu_r')
# fig1.show()

fig2 = px.imshow(resdf2,
                x=list(resdf.columns),
                y=list(resdf.index),
                color_continuous_scale='RdBu_r')
fig2.show()

asd = pd.DataFrame.from_dict(resba, orient='index')
asd

Unnamed: 0,avg_total_diff,avg_overestimate_diff,avg_underestimate_diff,highest_overestimate,lowest_underestimate,total_overestimates,total_underestimates,correlation,RMSE,avg_total_diff_stdev,avg_overestimate_diff_stdev,avg_underestimate_diff_stdev,highest_overestimate_stdev,lowest_underestimate_stdev,total_overestimates_stdev,total_underestimates_stdev,correlation_stdev,RMSE_stdev
,15.403983,10.527028,-23.07806,21.763276,-65.200798,89.816327,64.806122,154423.576239,22.18016,14.674059,13.64705,23.361859,19.392771,26.404281,53.729054,37.004223,173791.940333,15.742418
LimitAlgorithm,19.661015,1.831432,-30.802388,5.40257,-90.272716,67.520408,87.102041,34446.181409,34.075852,16.179826,3.285063,22.21549,7.724979,12.161294,48.75739,49.310397,77388.526057,20.712594
MedianAlgorithm,13.811583,9.718816,-20.152489,22.435607,-61.815448,88.928571,65.693878,176777.768502,20.131882,13.701765,12.952208,20.966655,20.983719,27.757117,54.526878,42.408035,193521.989936,14.618407
WeightedHybridAlgorithm,14.440712,9.800536,-21.498648,21.95119,-63.464306,89.377551,65.244898,162482.491511,21.171026,14.014855,13.059844,22.208681,19.748934,27.005469,55.80055,39.902216,178613.718272,15.290164
