In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import math
from estimators.NeuralFlow import *
from utils.SlidingWindowUtil import SlidingWindow

from sklearn import datasets, metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
from utils.GraphUtil import *
from estimators.GAEstimator import GAEstimator
from estimators.OptimizerNNEstimator import OptimizerNNEstimator
from io_utils.NumLoad import *
from sklearn.cross_validation import train_test_split
from estimators.FuzzyFlow import FuzzyFlow
from utils.TrainingTestMaker import TrainingTestMaker
from scaling.ProactiveSLA import ProactiveSLA

Mapped name None to device cuda: GeForce GT 630M
Using cuDNN version 5103 on context None


# Import Data

In [2]:
dataset_holder = []
trainee_holder = {}
metrics = ["cpu_rate","mem_usage","disk_io_time"]
arr_desk = ['X_train','y_train','X_test']
sliding_number = 3
data = pd.read_csv('sampling_617685_metric_10min_datetime_origin.csv',parse_dates=True,index_col=0)[:3000]
data.interpolate(inplace=True)
for metric in metrics:
    dat = pd.Series(data[metric].round(5))
    fuzzy_engine = FuzzyFlow()
    data_maker = TrainingTestMaker()
    fuzzy_engine.fit_transform(dat)
    sliding = np.array(list(SlidingWindow(fuzzy_engine.u_class_transform, sliding_number)))
    X_train, y_train, X_test, y_test = data_maker.make_fuzzy_test(sliding, fuzzy_engine.u_class_transform, dat)
    dataset_holder.append(fuzzy_engine)
#     if(metric=="cpu_rate"):
    trainee_holder[metric]={
        'X_train':X_train,
        "y_train":y_train,
        "X_test":X_test,
        "y_test":y_test
    }
#     else:
#         trainee_holder[metric]={
#             'X_train':np.array(X_train)*100+1,
#             "y_train":np.array(y_train)*100+1,
#             "X_test":np.array(X_test)*100+1,
#             "y_test":np.array(y_test)
#         }

In [3]:
# X_train = zip(trainee_holder['cpu_rate']['X_train'],trainee_holder['mem_usage']['X_train'])
y_train = np.asarray(zip(*[trainee_holder[metric]['y_train'] for metric in metrics]))
# X_test = zip(trainee_holder['cpu_rate']['X_test'],trainee_holder['mem_usage']['X_test'])
X_train = []
X_test = []
# y_train = []
for i in np.arange(len(trainee_holder['cpu_rate']['X_train'])):
#     tmp = zip(trainee_holder['cpu_rate']['X_train'][i],trainee_holder['mem_usage']['X_train'][i])
    tmp = zip(*[trainee_holder[metric]['X_train'][i] for metric in metrics])
    X_train.append(np.ravel(tmp))
for i in np.arange(len(trainee_holder['cpu_rate']['X_test'])):
    tmp = zip(*[trainee_holder[metric]['X_test'][i] for metric in metrics])
    X_test.append(np.ravel(tmp))
X_train = np.array(X_train)
X_test = np.array(X_test)
#     y_train.append(np.ravel(zip(trainee_holder['cpu_rate']['y_train'][i],trainee_holder['mem_usage']['y_train'][i])))

In [6]:
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from estimators.KerasRegressor import KerasRegressor

Using Theano backend.


In [41]:
model = Sequential(
    [
        Dense(128,input_dim=X_train.shape[1],activation='relu'),
        Dense(12,activation='relu'),
        Dense(y_train.shape[1],activation='relu')
    ])
earlyStopping = EarlyStopping(monitor='val_loss',patience=30)

In [42]:
%matplotlib
pd.Series(dataset_holder[0].u_class_transform).plot()

Using matplotlib backend: Qt4Agg


<matplotlib.axes._subplots.AxesSubplot at 0x7f4b40022ed0>

In [20]:
model = KerasRegressor(hidden_nodes=[64,16],steps=15000,batch_size=32, activation='sigmoid',verbose=2, learning_rate=0.001)

Initialization


In [None]:
model.fit(X_train,y_train)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_5 (Dense)                  (None, 64)            640         dense_input_3[0][0]              
____________________________________________________________________________________________________
dense_6 (Dense)                  (None, 16)            1040        dense_5[0][0]                    
____________________________________________________________________________________________________
dense_7 (Dense)                  (None, 3)             51          dense_6[0][0]                    
Total params: 1731
____________________________________________________________________________________________________
None
Train on 1890 samples, validate on 210 samples
Epoch 1/15000
0s - loss: 6.0376 - val_loss: 64.3686
Epoch 2/15000
0s - loss: 4.8844 - val_loss: 62.1669
Epoch 3/15000
0s 

In [None]:
classifier = model

In [None]:
y_pred = np.round(abs(classifier.predict(X_test)))
y_cpu = dataset_holder[0].inverse_transform(abs(y_pred[:,0]))
y_ram = dataset_holder[1].inverse_transform(abs(y_pred[:,1]))
score_mae_CPU = mean_absolute_error(y_cpu, trainee_holder['cpu_rate']['y_test'])
print score_mae_CPU
score_mae_RAM = mean_absolute_error(y_ram, trainee_holder['mem_usage']['y_test'])
print score_mae_RAM

In [18]:
%matplotlib
plot_figure_with_label(y_pred=y_cpu[100:200], y_test=trainee_holder['cpu_rate']['y_test'][100:200], 
                       title='Fuzzy BPNN Multi Dimension (Sliding Window Size = 3)', metric='CPU Utilization (%)')
# plot_figure_with_label(y_pred=y_ram[100:200], y_test=trainee_holder['mem_usage']['y_test'][100:200], 
#                        title='Fuzzy BPNN Multi Dimension (Sliding Window Size = 3)', metric='Memory Usage (%)')

Using matplotlib backend: Qt4Agg


<matplotlib.axes._subplots.AxesSubplot at 0x7fe6e4911e10>

In [17]:
np.savez('model_saved/CPU_BPNNM_%s_%s' % (sliding_number, score_mae_CPU), y_pred=y_cpu, 
         y_true=trainee_holder['cpu_rate']['y_test'])
np.savez('model_saved/RAM_BPNNM_%s_%s' % (sliding_number, score_mae_RAM), 
         y_pred=y_ram, y_test=trainee_holder['mem_usage']['y_test'])

# Fuzzy GABPNN

In [5]:
n_hidden = 7
fit_params = {
    'neural_shape': [len(X_train[0]), n_hidden, 2]
}
ga_estimator = GAEstimator(cross_rate=0.5, mutation_rate=0.02, gen_size=100, pop_size=30)
nn = NeuralFlowRegressor(hidden_nodes=[n_hidden], optimize='Adam',activation='sigmoid'
                         , steps=7000, learning_rate=1E-02)
classifier = OptimizerNNEstimator(ga_estimator, nn)
classifier.fit(X_train,y_train, **fit_params)

In [20]:
y_pred = np.round(abs(classifier.predict(X_test)))
y_cpu = dataset_holder[0].inverse_transform(abs(y_pred[:,0]))
y_ram = dataset_holder[1].inverse_transform(abs(y_pred[:,1]))
score_mae_CPU = mean_absolute_error(y_cpu, trainee_holder['cpu_rate']['y_test'])
print score_mae_CPU
score_mae_RAM = mean_absolute_error(y_ram, trainee_holder['mem_usage']['y_test'])
print score_mae_RAM

0.785109242762
0.11264298441


In [27]:
%matplotlib
# plot_figure_with_label(y_pred=y_cpu[100:200], y_test=trainee_holder['cpu_rate']['y_test'][100:200], 
#                        title='Fuzzy GABPNN Multi Dimension (Sliding Window Size = 3)', metric='CPU Utilization (%)')
plot_figure_with_label(y_pred=y_ram[100:200], y_test=trainee_holder['mem_usage']['y_test'][100:200], 
                       title='Fuzzy GABPNN Multi Dimension (Sliding Window Size = 3)', metric='Memory Usage (%)')

Using matplotlib backend: Qt4Agg


<matplotlib.axes._subplots.AxesSubplot at 0x7faea01a5450>

In [21]:
np.savez('model_saved/CPU_FGABPNNM_%s_%s' % (sliding_number, score_mae_CPU), y_pred=y_cpu, 
         y_true=trainee_holder['cpu_rate']['y_test'])
np.savez('model_saved/RAM_FGABPNNM_%s_%s' % (sliding_number, score_mae_RAM), 
         y_pred=y_ram, y_test=trainee_holder['mem_usage']['y_test'])

# Keras Multilayers

In [4]:
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Merge
from keras.utils.visualize_util import plot
from keras.callbacks import EarlyStopping
from IPython.display import SVG
from keras.utils.visualize_util import model_to_dot

Using Theano backend.


In [5]:
cpu_train = X_train[:,:3]
ram_train = X_train[:,3:]
cpu_test = X_test[:,:3]
ram_test = X_test[:,3:]

In [6]:
cpu_target = trainee_holder['cpu_rate']['y_test']
ram_target = trainee_holder['mem_usage']['y_test']

In [61]:
model_cpu = Sequential([Dense(3, input_dim=cpu_train.shape[1],activation='relu')])
model_ram = Sequential([Dense(3, input_dim=ram_train.shape[1],activation='relu')])

In [62]:
merged_model = Sequential()
merged_model.add(Merge([model_cpu, model_ram], mode='ave', concat_axis=1))
infer_model = Sequential([
        merged_model,
        Dense(1024, activation='relu'),
        Dropout(0.5),
        Dense(512,activation='relu'),
        Dense(256,activation='relu'),
        Dense(32,activation='relu'),
        Dense(output_dim=2,activation='relu')
    ])

In [63]:
infer_model.compile(loss='mean_squared_error',optimizer='adam')
earlyStop = EarlyStopping(monitor='val_loss',patience=20)

In [64]:
history = infer_model.fit([cpu_train,ram_train],y_train,nb_epoch=5000,batch_size=64,verbose=2,validation_split=0.1,
                callbacks=[earlyStop])

Train on 1890 samples, validate on 210 samples
Epoch 1/5000
0s - loss: 10.6999 - val_loss: 64.7213
Epoch 2/5000
0s - loss: 7.2567 - val_loss: 63.8412
Epoch 3/5000
0s - loss: 7.1655 - val_loss: 63.4482
Epoch 4/5000
0s - loss: 7.1745 - val_loss: 63.4283
Epoch 5/5000
0s - loss: 7.1396 - val_loss: 63.7168
Epoch 6/5000
0s - loss: 7.1425 - val_loss: 63.7338
Epoch 7/5000
0s - loss: 7.1477 - val_loss: 65.2760
Epoch 8/5000
0s - loss: 7.1754 - val_loss: 63.7664
Epoch 9/5000
0s - loss: 7.1487 - val_loss: 64.4669
Epoch 10/5000
0s - loss: 7.1682 - val_loss: 64.3312
Epoch 11/5000
0s - loss: 7.1363 - val_loss: 63.5353
Epoch 12/5000
0s - loss: 7.1573 - val_loss: 64.8231
Epoch 13/5000
0s - loss: 7.1606 - val_loss: 64.3240
Epoch 14/5000
0s - loss: 7.1400 - val_loss: 64.1241
Epoch 15/5000
0s - loss: 7.1415 - val_loss: 63.8108
Epoch 16/5000
0s - loss: 7.2563 - val_loss: 64.2214
Epoch 17/5000
0s - loss: 7.2512 - val_loss: 64.3820
Epoch 18/5000
0s - loss: 7.1624 - val_loss: 63.4711
Epoch 19/5000
0s - loss: 

In [65]:
log = history.history
df = pd.DataFrame.from_dict(log)
%matplotlib
df.plot(kind='line')

Using matplotlib backend: Qt4Agg


<matplotlib.axes._subplots.AxesSubplot at 0x7f1ac5ef13d0>

In [66]:
y_pred = np.round(abs(infer_model.predict([cpu_test,ram_test])))
y_cpu = dataset_holder[0].inverse_transform(abs(y_pred[:,0]))
y_ram = dataset_holder[1].inverse_transform(abs(y_pred[:,1]))
score_mae_CPU = mean_absolute_error(y_cpu, trainee_holder['cpu_rate']['y_test'])
print score_mae_CPU
score_mae_RAM = mean_absolute_error(y_ram, trainee_holder['mem_usage']['y_test'])
print score_mae_RAM

0.955596659243
0.21644766147


In [67]:
%matplotlib
# plot_figure_with_label(y_pred=y_cpu[100:200], y_test=trainee_holder['cpu_rate']['y_test'][100:200], 
#                        title='Fuzzy GABPNN Multi Dimension (Sliding Window Size = 3)', metric='CPU Utilization (%)')
plot_figure_with_label(y_pred=y_ram[100:200], y_test=trainee_holder['mem_usage']['y_test'][100:200], 
                       title='Fuzzy GABPNN Multi Dimension (Sliding Window Size = 3)', metric='Memory Usage (%)')

Using matplotlib backend: Qt4Agg




<matplotlib.axes._subplots.AxesSubplot at 0x7f1af8fed4d0>