In [1]:
import os
import tensorflow as tf
from model import Model
from input_pipe import InputPipe
from feeder import VarFeeder
from tqdm import trange
import matplotlib.pyplot as plt
import collections
import pandas as pd
import numpy as np
from trainer import predict
from hparams import build_hparams
import hparams
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
def smape(true, pred):
    summ = np.abs(true) + np.abs(pred)
    smape = np.where(summ == 0, 0, np.abs(true - pred) / summ)
    return smape

def mae(true, pred):
    return np.abs(np.abs(true) - np.abs(pred))

def mean_smape(true, pred):
    raw_smape = smape(true, pred)
    masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
    return masked_smape.mean()

def mean_mae(true, pred):
    raw_mae = mae(true, pred)
    masked_mae = np.ma.array(raw_mae, mask=np.isnan(raw_mae))
    return masked_mae.mean()

def predict_loss(prev, paths, split_df):
    # prev: true value
    # paths: paths to the model weights
    t_preds = []
    for tm in range(3):
        tf.reset_default_graph()
        t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=288,
                        n_models=3, target_model=tm, seed=2, batch_size=50, asgd=True, split_df=split_df))
    preds=sum(t_preds) /3
    preds.index = [idx.decode('ascii') for idx in preds.index]
    # mean mae
    res = 0
    for idx in preds.index:
        res += np.abs(preds.loc[idx, :] - prev.loc[idx, -288:]).sum()
    res /= len(preds.index) * 288
    return preds, res

def split_data(df):
    bad_path = os.path.join('data/badcase', 'single_rnn_mae_beyond_1000_vm_uuids')
    bad_df = pd.DataFrame()
    normal_df = df.copy()
    with open(bad_path, 'r') as f:
        line = f.readline()
        while(line):
            line = line[:-1] + ".hdf5"
            if line in df.index:
                bad_df = bad_df.append(df.loc[line])
                normal_df = normal_df.drop(line)
            line = f.readline()
    return bad_df.sort_index(), normal_df.sort_index()

def show_single(preds, prev, vm, scope=288, bad_case=True):
    name = preds.index[vm]
    if bad_case:
        bad_path = os.path.join('data/badcase', 'single_rnn_mae_beyond_1000_vm_uuids')
        bad_list = []
        with open(bad_path, 'r') as f:
            line = f.readline()
            while(line):
                line = line[:-1] + ".hdf5"
                if line in preds.index:
                    bad_list.append(line)
                line = f.readline()
        name = bad_list[vm]
    
    # mean mae for each row
    print(f'vm name: {name}')
    prev.loc[name, ends[vm] - scope : ends[vm]].plot(logy=True)
    (preds.loc[name, :]).plot(logy=True)
    # mean loss
    print(mean_mae(prev.loc[name, ends[vm] - 288 : ends[vm]], preds.loc[name, :]))

In [3]:
from make_features import read_all, read_pickle, find_start_end
df_all = read_all()
starts, ends = find_start_end(df_all.values)
prev = df_all.apply(lambda x : np.exp(x) - 1)

be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.
Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  columns=columns)


In [5]:
split_df = 0

In [32]:
bad_prev, normal_prev = split_data(prev)
bad_paths = [p for p in tf.train.get_checkpoint_state(os.path.join('data/normal_cpt', 's32')).all_model_checkpoint_paths]
print(f'Abnormal vm model weight path : [{bad_paths}]')
bad_preds, bad_loss = predict_loss(bad_prev, bad_paths, 2)
normal_paths = [p for p in tf.train.get_checkpoint_state(os.path.join('data/normal_cpt', 's32')).all_model_checkpoint_paths]
print(f'Normal vm model weight path : [{normal_paths}]')
normal_preds, normal_loss = predict_loss(normal_prev, normal_paths, 1)
mae = (bad_loss * len(bad_preds.index) + normal_loss * len(normal_preds.index)) / (len(bad_preds.index) + len(normal_preds.index))
print(f'Abnormal vm mean MAE = {bad_loss},\nnormal vm mean MAE = {normal_loss},\nall vm mean MAE = {mae}\n')

Abnormal vm model weight path : [['data/normal_cpt/s32/cpt-395']]
INFO:tensorflow:Restoring parameters from data/bad_vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/normal_cpt/s32/cpt-395
0..............🎉
INFO:tensorflow:Restoring parameters from data/bad_vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/normal_cpt/s32/cpt-395
0..............🎉
INFO:tensorflow:Restoring parameters from data/bad_vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/normal_cpt/s32/cpt-395
0..............🎉


In [7]:
paths = [p for p in tf.train.get_checkpoint_state(os.path.join('data/cpt', 's32')).all_model_checkpoint_paths]
preds, loss = predict_loss(prev, paths, split_df)
print(f'Mean MAE = {loss}\n........Generate csv for each csv..........')

INFO:tensorflow:Restoring parameters from data/vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/cpt/s32/cpt-133
0....................................................................🎉
INFO:tensorflow:Restoring parameters from data/vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/cpt/s32/cpt-133
0....................................................................🎉
INFO:tensorflow:Restoring parameters from data/vars/feeder.cpt
INFO:tensorflow:Restoring parameters from data/cpt/s32/cpt-133
0....................................................................🎉
Mean MAE = 554.8188417782551
........Generate csv for each csv..........


Visual sanity check

In [None]:
show_single(preds, prev, 110, 1288)