In [2]:
from pandas import DataFrame, concat, read_csv
from keras.models import model_from_json
from sklearn.preprocessing import StandardScaler
import os

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Config Vars

In [3]:
shift_num = 5
model_type = 'lstm'
directory_to_save = "data/shift_predictions"
conf_train_dataset_path = 'data/test_dataset.csv'
conf_dataset_columns_to_drop = ["@timestamp", "anomaly"]

## Loading Test Dataset

In [4]:
test_dataset = read_csv(conf_train_dataset_path, header=0, index_col=0)
test_dataset.drop(conf_dataset_columns_to_drop, axis=1, inplace=True)
test_dataset[test_dataset.columns] = StandardScaler().fit_transform(test_dataset)
test_dataset.head()

Unnamed: 0,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.system.pct,system.cpu.total.pct,system.cpu.user.pct,system.diskio_sda.iostat.await,system.diskio_sda.iostat.busy,system.diskio_sda.iostat.queue.avg_size,system.diskio_sda.iostat.read.request.merges_per_sec,...,jolokia.metrics.threading.thread_count,system.load.1,system.load.15,system.load.5,system.load.norm.1,system.load.norm.15,system.load.norm.5,system.memory.actual.used.pct,system.memory.swap.used.pct,system.memory.used.pct
40000,0.208742,-0.119609,0.234631,-0.51077,-0.208742,-0.166243,-0.151698,-0.183571,-0.118315,-0.042714,...,-0.316444,-0.17505,-0.162466,-0.156147,-0.175009,-0.162529,-0.156101,-0.006905,-0.84174,0.422987
40001,0.360382,-0.211161,0.234631,0.572227,-0.360382,-0.30803,-0.151698,-0.183571,-0.118315,-0.042714,...,-0.316444,-0.182763,-0.16375,-0.159437,-0.182722,-0.163813,-0.159485,0.00314,-0.84174,0.466005
40002,0.135836,-0.0249,-0.005913,-0.015891,-0.135836,-0.127667,-0.151698,-0.183571,-0.118315,-0.042714,...,-0.316444,-0.14334,-0.159899,-0.151447,-0.143298,-0.159962,-0.151401,-0.006905,-0.84174,0.491816
40003,0.269992,-0.125923,0.365836,0.256652,-0.269992,-0.238388,-0.151698,-0.183571,-0.118315,-0.042714,...,-0.316444,-0.148482,-0.160541,-0.152387,-0.14844,-0.160476,-0.152341,0.00314,-0.84174,0.509023
40004,0.311016,-0.184327,0.409572,0.801736,-0.311016,-0.271274,-0.151698,-0.183571,-0.118315,-0.042714,...,-0.316444,-0.153624,-0.161183,-0.153797,-0.153583,-0.161246,-0.153845,0.00314,-0.84174,0.526231


In [5]:
column_order = test_dataset.columns

## Clustered Columns

In [6]:
clustered_columns = [
    [
    'system.cpu.iowait.pct',
    'system.diskio_sda.iostat.await',
    'system.diskio_sda.iostat.busy',
    'system.diskio_sda.iostat.queue.avg_size',
    'system.diskio_sda.iostat.read.request.merges_per_sec',
    'system.diskio_sda.iostat.read.request.per_sec',
    'system.diskio_sda.iostat.request.avg_size',
    'system.diskio_sda.iostat.service_time',
    'system.diskio_sda.iostat.write.request.merges_per_sec',
    'system.diskio_sda.iostat.write.request.per_sec',
    'system.diskio_sda2.iostat.await',
    'system.diskio_sda2.iostat.busy',
    'system.diskio_sda2.iostat.queue.avg_size',
    'system.diskio_sda2.iostat.read.request.merges_per_sec',
    'system.diskio_sda2.iostat.read.request.per_sec',
    'system.diskio_sda2.iostat.request.avg_size',
    'system.diskio_sda2.iostat.service_time',
    'system.diskio_sda2.iostat.write.request.merges_per_sec',
    'system.diskio_sda2.iostat.write.request.per_sec'
    ],
    [
    'system.cpu.total.pct',
    'system.cpu.user.pct'
    ],
    [
    'jolokia.metrics.memory.heap_memory_usage.committed',
    'jolokia.metrics.memory.heap_memory_usage.max'
    ],
    [
    'system.load.1',
    'system.load.15',
    'system.load.5',
    'system.load.norm.1',
    'system.load.norm.15',
    'system.load.norm.5'
    ],
    [
    'system.cpu.idle.pct',
    'system.cpu.softirq.pct',
    'system.cpu.system.pct',
    'jolokia.metrics.memory.heap_memory_usage.used',
    'jolokia.metrics.memory.non_heap_memory_usage.used',
    'jolokia.metrics.threading.daemon_thread_count',
    'jolokia.metrics.threading.thread_count',
    'system.memory.actual.used.pct',
    'system.memory.swap.used.pct',
    'system.memory.used.pct'
    ],
    
]

## Loading Models

In [7]:
def loadModel(model_name):
    json_file = open(model_name+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(model_name+".h5")
    print("Loaded " + model_name + " from disk")
    return loaded_model

shifts = []
for i in range(1, shift_num+1):
    models = []
    for j in range(1,6):
        model = loadModel("models/shift-" + str(i) + "/model_" + model_type + "_" + str(j) )
        models.append(model)
    shifts.append(models)

Loaded models/shift-1/model_lstm_1 from disk
Loaded models/shift-1/model_lstm_2 from disk
Loaded models/shift-1/model_lstm_3 from disk
Loaded models/shift-1/model_lstm_4 from disk
Loaded models/shift-1/model_lstm_5 from disk
Loaded models/shift-2/model_lstm_1 from disk
Loaded models/shift-2/model_lstm_2 from disk
Loaded models/shift-2/model_lstm_3 from disk
Loaded models/shift-2/model_lstm_4 from disk
Loaded models/shift-2/model_lstm_5 from disk
Loaded models/shift-3/model_lstm_1 from disk
Loaded models/shift-3/model_lstm_2 from disk
Loaded models/shift-3/model_lstm_3 from disk
Loaded models/shift-3/model_lstm_4 from disk
Loaded models/shift-3/model_lstm_5 from disk
Loaded models/shift-4/model_lstm_1 from disk
Loaded models/shift-4/model_lstm_2 from disk
Loaded models/shift-4/model_lstm_3 from disk
Loaded models/shift-4/model_lstm_4 from disk
Loaded models/shift-4/model_lstm_5 from disk
Loaded models/shift-5/model_lstm_1 from disk
Loaded models/shift-5/model_lstm_2 from disk
Loaded mod

## Predicting 

In [8]:
shift_predictions = []
for i in range(1, shift_num+1):
    models = shifts[i-1]
    predictions = []
    for j in range(1,6):
        model = models[j-1]
        prediction_arr = model.predict(test_dataset.values.reshape((test_dataset.shape[0], 1, test_dataset.shape[1])))
        prediction = DataFrame(prediction_arr)
        prediction.columns = clustered_columns[j-1]
        predictions.append(prediction)
    
    shift_prediction = concat(predictions, axis=1)
    shift_prediction = shift_prediction.set_index(test_dataset.index)
    shift_predictions.append(shift_prediction)

# Reordering Columns

In [9]:
def order_columns(dataset, column_arr):
    out = DataFrame(index = dataset.index, columns = column_arr )
    for col in column_arr:
        out[col] = dataset[col]
    return out

In [10]:
shift_predictions[0].head()

Unnamed: 0,system.cpu.iowait.pct,system.diskio_sda.iostat.await,system.diskio_sda.iostat.busy,system.diskio_sda.iostat.queue.avg_size,system.diskio_sda.iostat.read.request.merges_per_sec,system.diskio_sda.iostat.read.request.per_sec,system.diskio_sda.iostat.request.avg_size,system.diskio_sda.iostat.service_time,system.diskio_sda.iostat.write.request.merges_per_sec,system.diskio_sda.iostat.write.request.per_sec,...,system.cpu.idle.pct,system.cpu.softirq.pct,system.cpu.system.pct,jolokia.metrics.memory.heap_memory_usage.used,jolokia.metrics.memory.non_heap_memory_usage.used,jolokia.metrics.threading.daemon_thread_count,jolokia.metrics.threading.thread_count,system.memory.actual.used.pct,system.memory.swap.used.pct,system.memory.used.pct
40000,-0.361517,-0.495771,-0.488421,-0.509432,-0.500998,-0.589186,-0.678541,-0.379326,-0.401345,-0.033801,...,0.076566,0.12209,-0.034811,-0.215506,0.707943,0.441846,0.335626,-0.359577,-0.795136,0.387304
40001,-0.370022,-0.556739,-0.465269,-0.562885,-0.459286,-0.564216,-0.763645,-0.397039,-0.430806,0.205845,...,0.118582,0.141133,0.092217,-0.221396,0.685618,0.361812,0.154472,-0.370244,-0.80265,0.414132
40002,-0.368359,-0.506494,-0.498423,-0.51938,-0.494396,-0.580752,-0.686337,-0.387486,-0.41378,-0.044198,...,0.031737,0.103829,0.123076,-0.125514,0.732386,0.50509,0.374021,-0.350802,-0.872432,0.174703
40003,-0.384864,-0.554604,-0.495888,-0.561864,-0.484434,-0.583757,-0.750241,-0.408671,-0.438167,0.121418,...,0.081801,0.130099,0.089975,-0.184945,0.713387,0.431222,0.270579,-0.362271,-0.828267,0.33062
40004,-0.391702,-0.580176,-0.493683,-0.584204,-0.471601,-0.57776,-0.78132,-0.418692,-0.450568,0.191431,...,0.093579,0.135047,0.196174,-0.152649,0.711237,0.411132,0.228427,-0.373698,-0.849464,0.302044


In [11]:
for i in range(shift_num):
    shift_predictions[i] = order_columns(shift_predictions[i], column_order)

In [12]:
shift_predictions[0].head()

Unnamed: 0,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.system.pct,system.cpu.total.pct,system.cpu.user.pct,system.diskio_sda.iostat.await,system.diskio_sda.iostat.busy,system.diskio_sda.iostat.queue.avg_size,system.diskio_sda.iostat.read.request.merges_per_sec,...,jolokia.metrics.threading.thread_count,system.load.1,system.load.15,system.load.5,system.load.norm.1,system.load.norm.15,system.load.norm.5,system.memory.actual.used.pct,system.memory.swap.used.pct,system.memory.used.pct
40000,0.076566,-0.361517,0.12209,-0.034811,-0.416165,-0.352231,-0.495771,-0.488421,-0.509432,-0.500998,...,0.335626,-0.164842,-0.126321,-0.157648,-0.166058,-0.126266,-0.158939,-0.359577,-0.795136,0.387304
40001,0.118582,-0.370022,0.141133,0.092217,-0.436169,-0.377877,-0.556739,-0.465269,-0.562885,-0.459286,...,0.154472,-0.165528,-0.1252,-0.157787,-0.167715,-0.128227,-0.160892,-0.370244,-0.80265,0.414132
40002,0.031737,-0.368359,0.103829,0.123076,-0.392527,-0.340832,-0.506494,-0.498423,-0.51938,-0.494396,...,0.374021,-0.15246,-0.134078,-0.157397,-0.151779,-0.130855,-0.156217,-0.350802,-0.872432,0.174703
40003,0.081801,-0.384864,0.130099,0.089975,-0.419147,-0.364509,-0.554604,-0.495888,-0.561864,-0.484434,...,0.270579,-0.154946,-0.126663,-0.151122,-0.156765,-0.128618,-0.153404,-0.362271,-0.828267,0.33062
40004,0.093579,-0.391702,0.135047,0.196174,-0.423454,-0.368456,-0.580176,-0.493683,-0.584204,-0.471601,...,0.228427,-0.154773,-0.125196,-0.149665,-0.157118,-0.128785,-0.152865,-0.373698,-0.849464,0.302044


In [13]:
shift_predictions[1].head()

Unnamed: 0,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.system.pct,system.cpu.total.pct,system.cpu.user.pct,system.diskio_sda.iostat.await,system.diskio_sda.iostat.busy,system.diskio_sda.iostat.queue.avg_size,system.diskio_sda.iostat.read.request.merges_per_sec,...,jolokia.metrics.threading.thread_count,system.load.1,system.load.15,system.load.5,system.load.norm.1,system.load.norm.15,system.load.norm.5,system.memory.actual.used.pct,system.memory.swap.used.pct,system.memory.used.pct
40000,0.181044,-0.231889,0.118502,0.039701,-0.256571,-0.206117,-0.156909,-0.220135,-0.059778,0.105538,...,-0.191893,-0.20752,-0.237975,-0.242148,-0.206104,-0.24137,-0.242459,0.087741,-0.833121,0.140597
40001,0.259552,-0.268387,0.205137,0.236606,-0.254009,-0.201967,-0.153754,-0.230749,-0.024946,0.144275,...,-0.364191,-0.219645,-0.217521,-0.241217,-0.217796,-0.221828,-0.241625,0.170708,-0.865036,0.097214
40002,0.141409,-0.206431,0.110979,0.168448,-0.258658,-0.212698,-0.146228,-0.203364,-0.062566,0.097138,...,-0.21261,-0.193988,-0.24542,-0.240812,-0.191679,-0.251026,-0.241063,0.061907,-0.822,0.175012
40003,0.209116,-0.263988,0.158426,0.176215,-0.254712,-0.205879,-0.173307,-0.240035,-0.056481,0.121024,...,-0.292581,-0.208331,-0.229976,-0.241492,-0.20606,-0.235365,-0.241827,0.145051,-0.850924,0.124434
40004,0.230868,-0.264009,0.193967,0.298155,-0.253033,-0.20261,-0.157534,-0.230474,-0.033071,0.136243,...,-0.363628,-0.214154,-0.220281,-0.241166,-0.211634,-0.226193,-0.24155,0.164534,-0.858882,0.110402


## Saving Prediction

In [14]:
if not os.path.exists(directory_to_save):
    os.makedirs(directory_to_save)
    
for i in range(shift_num):
    shift_predictions[i].to_csv(directory_to_save + "/predicted_data_" + model_type + "_model_" + str(i+1) + ".csv")

In [22]:
shift_predictions[0]['system.cpu.idle.pct']

40000    0.076566
40001    0.118582
40002    0.031737
40003    0.081801
40004    0.093579
40005   -0.004750
40006   -0.021615
40007   -0.013902
40008   -0.060670
40009   -0.035225
40010    0.065840
40011    0.130112
40012    0.113380
40013    0.132003
40014    0.125537
40015    0.115628
40016    0.095056
40017    0.149737
40018    0.164206
40019    0.151868
40020    0.133819
40021    0.143273
40022    0.130575
40023    0.142172
40024    0.029930
40025    0.184334
40026    0.185862
40027    0.172432
40028    0.165134
40029    0.150191
           ...   
49970    0.460944
49971    0.357132
49972    0.438193
49973    0.480937
49974    0.147332
49975    0.389225
49976    0.418849
49977    0.211010
49978    0.203686
49979    0.224342
49980    0.260829
49981    0.252455
49982    0.258782
49983    0.255348
49984    0.261571
49985    0.243628
49986    0.216391
49987    0.233638
49988    0.228023
49989    0.226298
49990    0.228726
49991    0.221549
49992    0.303969
49993    0.322212
49994    0