In [39]:
from pandas import DataFrame, concat, read_csv
from keras.models import model_from_json
import os

## Config Vars

In [33]:
shift_num = 5
model_type = 'lstm'
directory_to_save = "shift_predictions"
conf_dataset_columns_to_drop = ["@timestamp", "anomaly"]

## Loading Test Dataset

In [34]:
test_dataset = read_csv('test_data.csv', header=0, index_col=0)
test_dataset.drop(conf_dataset_columns_to_drop, axis=1, inplace=True)
test_dataset.head()

Unnamed: 0,system.cpu.idle.pct,system.cpu.iowait.pct,system.cpu.softirq.pct,system.cpu.system.pct,system.cpu.total.pct,system.cpu.user.pct,system.diskio_sda.iostat.await,system.diskio_sda.iostat.busy,system.diskio_sda.iostat.queue.avg_size,system.diskio_sda.iostat.read.request.merges_per_sec,...,jolokia.metrics.threading.thread_count,system.load.1,system.load.15,system.load.5,system.load.norm.1,system.load.norm.15,system.load.norm.5,system.memory.actual.used.pct,system.memory.swap.used.pct,system.memory.used.pct
40002,-0.006816,0.042121,-0.130664,-0.108046,0.006816,-0.003021,-0.117234,-0.131404,-0.108468,-0.036651,...,-0.324488,-0.093839,-0.110122,-0.102609,-0.096184,-0.112999,-0.105119,-0.134875,-0.078425,0.356877
40003,0.176608,-0.089197,0.290568,0.219087,-0.176608,-0.155743,-0.117234,-0.131404,-0.108468,-0.036651,...,-0.324488,-0.100457,-0.110946,-0.103817,-0.1028,-0.113659,-0.106327,-0.125934,-0.078425,0.371657
40004,0.232698,-0.165114,0.340124,0.873352,-0.232698,-0.201104,-0.117234,-0.131404,-0.108468,-0.036651,...,-0.324488,-0.107075,-0.111771,-0.10563,-0.109415,-0.114648,-0.10826,-0.125934,-0.078425,0.386437
40005,0.251916,-0.164088,-0.130664,-0.280222,-0.251916,-0.196866,-0.113205,-0.194419,-0.110453,-0.036651,...,-0.084343,-0.118655,-0.112595,-0.108047,-0.121103,-0.115307,-0.110676,-0.130405,-0.078425,0.393827
40006,0.196919,-0.099456,0.563129,0.425697,-0.196919,-0.178502,-0.113205,-0.194419,-0.110453,-0.036651,...,-0.084343,-0.128582,-0.113419,-0.110463,-0.131026,-0.116297,-0.113093,-0.130405,-0.078425,0.430778


## Clustered Columns

In [35]:
clustered_columns = [
    [
    'system.cpu.iowait.pct',
    'system.diskio_sda.iostat.await',
    'system.diskio_sda.iostat.busy',
    'system.diskio_sda.iostat.queue.avg_size',
    'system.diskio_sda.iostat.read.request.merges_per_sec',
    'system.diskio_sda.iostat.read.request.per_sec',
    'system.diskio_sda.iostat.request.avg_size',
    'system.diskio_sda.iostat.service_time',
    'system.diskio_sda.iostat.write.request.merges_per_sec',
    'system.diskio_sda.iostat.write.request.per_sec',
    'system.diskio_sda2.iostat.await',
    'system.diskio_sda2.iostat.busy',
    'system.diskio_sda2.iostat.queue.avg_size',
    'system.diskio_sda2.iostat.read.request.merges_per_sec',
    'system.diskio_sda2.iostat.read.request.per_sec',
    'system.diskio_sda2.iostat.request.avg_size',
    'system.diskio_sda2.iostat.service_time',
    'system.diskio_sda2.iostat.write.request.merges_per_sec',
    'system.diskio_sda2.iostat.write.request.per_sec'
    ],
    [
    'system.cpu.total.pct',
    'system.cpu.user.pct'
    ],
    [
    'jolokia.metrics.memory.heap_memory_usage.committed',
    'jolokia.metrics.memory.heap_memory_usage.max'
    ],
    [
    'system.load.1',
    'system.load.15',
    'system.load.5',
    'system.load.norm.1',
    'system.load.norm.15',
    'system.load.norm.5'
    ],
    [
    'system.cpu.idle.pct',
    'system.cpu.softirq.pct',
    'system.cpu.system.pct',
    'jolokia.metrics.memory.heap_memory_usage.used',
    'jolokia.metrics.memory.non_heap_memory_usage.used',
    'jolokia.metrics.threading.daemon_thread_count',
    'jolokia.metrics.threading.thread_count',
    'system.memory.actual.used.pct',
    'system.memory.swap.used.pct',
    'system.memory.used.pct'
    ],
    
]

## Loading Models

In [36]:
def loadModel(model_name):
    json_file = open(model_name+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(model_name+".h5")
    print("Loaded " + model_name + " from disk")
    return loaded_model

shifts = []
for i in range(1, shift_num+1):
    models = []
    for j in range(1,6):
        model = loadModel("shift-" + str(i) + "/model_" + model_type + "_" + str(j) )
        models.append(model)
    shifts.append(models)

Loaded shift-1/model_lstm_1 from disk
Loaded shift-1/model_lstm_2 from disk
Loaded shift-1/model_lstm_3 from disk
Loaded shift-1/model_lstm_4 from disk
Loaded shift-1/model_lstm_5 from disk
Loaded shift-2/model_lstm_1 from disk
Loaded shift-2/model_lstm_2 from disk
Loaded shift-2/model_lstm_3 from disk
Loaded shift-2/model_lstm_4 from disk
Loaded shift-2/model_lstm_5 from disk
Loaded shift-3/model_lstm_1 from disk
Loaded shift-3/model_lstm_2 from disk
Loaded shift-3/model_lstm_3 from disk
Loaded shift-3/model_lstm_4 from disk
Loaded shift-3/model_lstm_5 from disk
Loaded shift-4/model_lstm_1 from disk
Loaded shift-4/model_lstm_2 from disk
Loaded shift-4/model_lstm_3 from disk
Loaded shift-4/model_lstm_4 from disk
Loaded shift-4/model_lstm_5 from disk
Loaded shift-5/model_lstm_1 from disk
Loaded shift-5/model_lstm_2 from disk
Loaded shift-5/model_lstm_3 from disk
Loaded shift-5/model_lstm_4 from disk
Loaded shift-5/model_lstm_5 from disk


## Predicting 

In [37]:
shift_predictions = []
for i in range(1, shift_num+1):
    models = shifts[i-1]
    predictions = []
    for j in range(1,6):
        model = models[j-1]
        prediction_arr = model.predict(test_dataset.values.reshape((test_dataset.shape[0], 1, test_dataset.shape[1])))
        prediction = DataFrame(prediction_arr)
        prediction.columns = clustered_columns[j-1]
        predictions.append(prediction)
    
    shift_prediction = concat(predictions, axis=1)
    shift_prediction = shift_prediction.set_index(test_dataset.index)
    shift_predictions.append(shift_prediction)

## Saving Prediction

In [42]:
if not os.path.exists(directory_to_save):
    os.makedirs(directory_to_save)
    
for i in range(shift_num):
    shift_predictions[i].to_csv(directory_to_save + "/predicted_data_" + model_type + "_model_" + str(i+1) + ".csv")