In [59]:
from pandas import DataFrame, concat, read_csv
from keras.models import model_from_json
from sklearn.preprocessing import StandardScaler
import os

## Config Vars

In [60]:
shift_num = 3
model_type = 'gru'
directory_to_save = "data/shift_predictions"
conf_train_dataset_path = 'data/test_dataset.csv'
conf_dataset_columns_to_drop = ["@timestamp", "label"]


## Loading Test Dataset

In [61]:
test_dataset = read_csv(conf_train_dataset_path, header=0, index_col=0)
test_dataset.drop(conf_dataset_columns_to_drop, axis=1, inplace=True)
test_dataset[test_dataset.columns] = StandardScaler().fit_transform(test_dataset)
test_dataset.head()

Unnamed: 0,system.cpu.user.pct,system.cpu.system.pct,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.total.pct,system.memory.used.pct,system.network.in.bytes,system.network.in.packets,system.network.in.dropped,...,system.diskio.iostat.read.per_sec.bytes,system.diskio.iostat.write.per_sec.bytes,jvm.metrics.memory.heap_memory_usage.committed,jvm.metrics.memory.heap_memory_usage.max,jvm.metrics.memory.heap_memory_usage.used,jvm.metrics.threading.thread_count,jvm.metrics.gc.psms.collection_count,jvm.metrics.gc.psms.collection_time,jvm.metrics.gc.pss.collection_count,jvm.metrics.gc.pss.collection_time
57572,-1.048614,0.547912,0.84073,0.150908,0.178635,-0.84073,0.518097,0.428102,0.572126,1.45986,...,-0.023243,-0.029943,0.03597,0.031253,-0.077524,-0.658071,-0.12856,-0.126507,-0.32202,-0.359868
57573,-0.133157,0.494376,-0.363016,1.50433,0.921987,0.363016,0.529051,0.428102,0.392517,0.261668,...,-0.023243,-0.029943,0.03597,0.031253,-0.077524,-0.658071,-0.12856,-0.126507,-0.32202,-0.359868
57574,-0.786836,-1.754148,0.92622,-0.26912,0.959155,-0.92622,0.529051,0.428102,0.500171,-0.936525,...,-0.023243,-0.029943,0.03597,0.031253,-0.077524,-0.658071,-0.12856,-0.126507,-0.32202,-0.359868
57575,-0.817454,-0.93772,0.505667,1.255424,-0.193041,-0.505667,0.529051,0.428102,0.259204,-0.936525,...,-0.023243,-0.029943,0.03597,0.031253,-0.077524,-0.658071,-0.12856,-0.126507,-0.32202,-0.359868
57576,0.361312,1.846167,-0.532615,-0.217265,2.037015,0.532615,0.534528,0.428102,0.666951,1.45986,...,-0.033575,0.01835,-0.081219,-0.091357,0.452194,-0.345626,-0.12856,-0.126507,-0.32202,-0.387949


In [62]:
column_order = test_dataset.columns

## Clustered Columns

In [63]:
clustered_columns = [
    ['system.cpu.user.pct',
    'system.cpu.system.pct',
    'system.cpu.idle.pct',
    'system.cpu.iowait.pct',
    'system.cpu.softirq.pct',
    'system.cpu.total.pct'],
    
    ['system.network.in.bytes',
    'system.network.out.bytes',
    'system.network.in.dropped',
    'system.network.out.errors'],
    
    ['system.network.in.packets',
    'system.network.out.packets'],
    
    ['system.diskio.iostat.await',
    'system.diskio.iostat.queue.avg_size',
    'system.diskio.iostat.read.per_sec.bytes',
    'system.diskio.iostat.write.per_sec.bytes'],
    
    ['jvm.metrics.memory.heap_memory_usage.committed',
    'jvm.metrics.memory.heap_memory_usage.max'],
    
    ['jvm.metrics.memory.heap_memory_usage.used',
    'jvm.metrics.gc.psms.collection_count',
    'jvm.metrics.gc.psms.collection_time',
    'system.memory.used.pct'],
    
    ['jvm.metrics.gc.pss.collection_count',
    'jvm.metrics.gc.pss.collection_time',
    'jvm.metrics.threading.thread_count'],   
]

cols = ['system.cpu.user.pct', 'system.cpu.system.pct',
       'system.cpu.idle.pct', 'system.cpu.iowait.pct',
       'system.cpu.softirq.pct', 'system.cpu.total.pct',
       'system.memory.used.pct',
       'system.network.in.bytes', 'system.network.in.packets',
       'system.network.in.dropped', 'system.network.out.bytes',
       'system.network.out.packets', 'system.network.out.errors',
       'system.diskio.iostat.await',
       'system.diskio.iostat.queue.avg_size',
       'system.diskio.iostat.read.per_sec.bytes',
       'system.diskio.iostat.write.per_sec.bytes',
       'jvm.metrics.memory.heap_memory_usage.committed',
       'jvm.metrics.memory.heap_memory_usage.max',
       'jvm.metrics.memory.heap_memory_usage.used',
       'jvm.metrics.threading.thread_count',
       'jvm.metrics.gc.psms.collection_count',
       'jvm.metrics.gc.psms.collection_time',
       'jvm.metrics.gc.pss.collection_count',
       'jvm.metrics.gc.pss.collection_time']

## Loading Models

In [64]:
def loadModel(model_name):
    json_file = open(model_name+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(model_name+".h5")
    print("Loaded " + model_name + " from disk")
    return loaded_model

shifts = []
for i in range(1, shift_num+1):
    models = []
    for j in range(1,8):
        model = loadModel("data/models/shift-"+str(shift_num)+"/model_" + model_type + "_" + str(j) )
        models.append(model)
    shifts.append(models)

Loaded data/models/shift-3/model_gru_1 from disk
Loaded data/models/shift-3/model_gru_2 from disk
Loaded data/models/shift-3/model_gru_3 from disk
Loaded data/models/shift-3/model_gru_4 from disk
Loaded data/models/shift-3/model_gru_5 from disk
Loaded data/models/shift-3/model_gru_6 from disk
Loaded data/models/shift-3/model_gru_7 from disk
Loaded data/models/shift-3/model_gru_1 from disk
Loaded data/models/shift-3/model_gru_2 from disk
Loaded data/models/shift-3/model_gru_3 from disk
Loaded data/models/shift-3/model_gru_4 from disk
Loaded data/models/shift-3/model_gru_5 from disk
Loaded data/models/shift-3/model_gru_6 from disk
Loaded data/models/shift-3/model_gru_7 from disk
Loaded data/models/shift-3/model_gru_1 from disk
Loaded data/models/shift-3/model_gru_2 from disk
Loaded data/models/shift-3/model_gru_3 from disk
Loaded data/models/shift-3/model_gru_4 from disk
Loaded data/models/shift-3/model_gru_5 from disk
Loaded data/models/shift-3/model_gru_6 from disk
Loaded data/models/s

## Predicting 

In [65]:
shift_predictions = []
for i in range(1, shift_num+1):
    models = shifts[i-1]
    predictions = []
    for j in range(1,8):
        model = models[j-1]
        prediction_arr = model.predict(test_dataset.values.reshape((test_dataset.shape[0], 1, test_dataset.shape[1])))
        prediction = DataFrame(prediction_arr)
        prediction.columns = clustered_columns[j-1]
        predictions.append(prediction)
    
    shift_prediction = concat(predictions, axis=1)
    shift_prediction = shift_prediction.set_index(test_dataset.index)
    shift_predictions.append(shift_prediction)

# Reordering Columns

In [66]:
def order_columns(dataset, column_arr):
    out = DataFrame(index = dataset.index, columns = column_arr )
    for col in column_arr:
        out[col] = dataset[col]
    return out

In [67]:
shift_predictions[0].head()

Unnamed: 0,system.cpu.user.pct,system.cpu.system.pct,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.total.pct,system.network.in.bytes,system.network.out.bytes,system.network.in.dropped,system.network.out.errors,...,system.diskio.iostat.write.per_sec.bytes,jvm.metrics.memory.heap_memory_usage.committed,jvm.metrics.memory.heap_memory_usage.max,jvm.metrics.memory.heap_memory_usage.used,jvm.metrics.gc.psms.collection_count,jvm.metrics.gc.psms.collection_time,system.memory.used.pct,jvm.metrics.gc.pss.collection_count,jvm.metrics.gc.pss.collection_time,jvm.metrics.threading.thread_count
57572,-0.19964,-0.02142,0.200402,0.052221,-0.168605,-0.200402,-0.000409,0.055003,-0.003062,0.004555,...,-0.010089,0.580242,0.555622,-0.323343,-0.166466,-0.186098,0.156639,0.074925,0.126197,-0.515989
57573,-0.122694,0.142948,0.104582,0.059933,-0.033281,-0.104582,0.093191,0.074217,0.004657,0.009054,...,0.25555,0.647214,0.625094,-0.498548,-0.146959,-0.14952,0.24278,-0.04425,-0.056041,-0.41049
57574,0.011314,0.2734,-0.032295,-0.180021,-0.736654,0.032295,0.068327,0.136038,0.017077,0.188282,...,0.133264,0.232579,0.239563,-0.250962,0.181329,0.186205,0.467932,-0.01747,0.056827,-0.527458
57575,-0.174402,-0.04855,0.182432,-0.038395,-0.459343,-0.182431,0.067815,0.130894,0.010848,0.09037,...,0.109019,0.385738,0.383321,-0.311221,0.013509,0.01237,0.30509,-0.064892,-0.01307,-0.461789
57576,0.104728,2.150459,-0.333867,-0.556295,0.066176,0.333874,0.246016,0.16154,0.010644,0.004055,...,0.623621,1.585696,1.563429,-0.239836,-0.346283,-0.376704,0.227837,0.271708,0.227079,-0.215988


In [68]:
for i in range(shift_num):
    shift_predictions[i] = order_columns(shift_predictions[i], cols)

In [69]:
shift_predictions[0].head()

Unnamed: 0,system.cpu.user.pct,system.cpu.system.pct,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.total.pct,system.memory.used.pct,system.network.in.bytes,system.network.in.packets,system.network.in.dropped,...,system.diskio.iostat.read.per_sec.bytes,system.diskio.iostat.write.per_sec.bytes,jvm.metrics.memory.heap_memory_usage.committed,jvm.metrics.memory.heap_memory_usage.max,jvm.metrics.memory.heap_memory_usage.used,jvm.metrics.threading.thread_count,jvm.metrics.gc.psms.collection_count,jvm.metrics.gc.psms.collection_time,jvm.metrics.gc.pss.collection_count,jvm.metrics.gc.pss.collection_time
57572,-0.19964,-0.02142,0.200402,0.052221,-0.168605,-0.200402,0.156639,-0.000409,-0.109839,-0.003062,...,-0.348678,-0.010089,0.580242,0.555622,-0.323343,-0.515989,-0.166466,-0.186098,0.074925,0.126197
57573,-0.122694,0.142948,0.104582,0.059933,-0.033281,-0.104582,0.24278,0.093191,0.061639,0.004657,...,-0.10917,0.25555,0.647214,0.625094,-0.498548,-0.41049,-0.146959,-0.14952,-0.04425,-0.056041
57574,0.011314,0.2734,-0.032295,-0.180021,-0.736654,0.032295,0.467932,0.068327,-0.453084,0.017077,...,0.0154,0.133264,0.232579,0.239563,-0.250962,-0.527458,0.181329,0.186205,-0.01747,0.056827
57575,-0.174402,-0.04855,0.182432,-0.038395,-0.459343,-0.182431,0.30509,0.067815,-0.182479,0.010848,...,0.007189,0.109019,0.385738,0.383321,-0.311221,-0.461789,0.013509,0.01237,-0.064892,-0.01307
57576,0.104728,2.150459,-0.333867,-0.556295,0.066176,0.333874,0.227837,0.246016,-0.128361,0.010644,...,-0.096316,0.623621,1.585696,1.563429,-0.239836,-0.215988,-0.346283,-0.376704,0.271708,0.227079


## Saving Prediction

In [70]:
if not os.path.exists(directory_to_save):
    os.makedirs(directory_to_save)
    
for i in range(shift_num):
    shift_predictions[i].to_csv(directory_to_save + "/predicted_data_" + model_type + "_model_" + str(i+1) + ".csv")

In [71]:
shift_predictions[0]['system.cpu.idle.pct']

57572    0.200402
57573    0.104582
57574   -0.032295
57575    0.182432
57576   -0.333867
57577    0.124429
57578    0.176998
57579   -0.884609
57580    0.154197
57581    0.229494
57582   -0.828471
57583    0.197853
57584    0.139903
57585    0.123248
57586    0.147787
57587    0.161681
57588   -0.028564
57589    0.192080
57590    0.173420
57591   -0.344802
57592    0.053178
57593    0.178394
57594   -0.266643
57595    0.085543
57596    0.196321
57597   -2.613375
57598    0.186319
57599    0.210383
57600   -0.412513
57601    0.256214
           ...   
71934   -0.526770
71935    0.173430
71936   -0.527607
71937   -0.215865
71938    0.168014
71939    0.227393
71940   -1.000556
71941   -0.063143
71942    0.059818
71943   -0.249361
71944    0.163809
71945   -0.917758
71946   -0.026036
71947    0.719802
71948   -0.272486
71949   -0.437127
71950    0.132968
71951    0.165388
71952   -1.008409
71953    0.116370
71954    0.156002
71955    0.217347
71956    0.239421
71957    0.187664
71958   -1