In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import math
from estimators.NeuralFlow import *
from utils.SlidingWindowUtil import SlidingWindow

from sklearn import datasets, metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
from utils.GraphUtil import *
from estimators.GAEstimator import GAEstimator
from io_utils.NumLoad import *
from estimators.OptimizerNNEstimator import OptimizerNNEstimator
from scaling.ProactiveSLA import ProactiveSLA

Mapped name None to device cuda: GeForce GT 630M
Using cuDNN version 5103 on context None


# Import Data

In [2]:
scaler = MinMaxScaler()
dat = pd.read_csv('sampling_617685_metric_10min_datetime.csv',parse_dates=True,index_col=0)[:3000]
# dat = pd.read_csv('data/gdata/sampling_617685_metric_1min_datetime.csv',parse_dates=True,index_col=0)
dat = pd.Series(dat['cpu_rate'].round(3))
distance = round(dat.max() / (dat.max() / 0.25 + 4), 4)
print distance

0.2457


# Preprocessing Fuzzy Time Series

In [3]:
partition_size = distance
umin = math.floor(min(dat));
umax = math.ceil(max(dat));
# 2: Partition of universe
# Method: Dividing in the half-thousands
def get_midpoint(ptuple):
    return 0.5*(ptuple[0]+ptuple[1])
def get_midpoint_vector(tuple_vector):
    return [get_midpoint(x) for x in tuple_vector];
def get_fuzzy_class(point, partition_size):
    return int(math.floor(point / partition_size))
def get_fuzzy_dataset(data):
    u_class = []
    for item in data:
        u_class.append(get_fuzzy_class(item,partition_size))
    return u_class
def mapping_class(u_class):
    unique_class = np.unique(u_class)
    index = np.arange(unique_class.shape[0])
    inverted = {}
    mapping = {}
    for idx,val in enumerate(unique_class):
        mapping[val] = idx
        inverted[idx] = val
    return mapping, inverted
def defuzzy(index, inverted,midpoints):
    f_class = inverted[index]
    return midpoints[f_class]

nIter = int((umax-umin)/partition_size)
u_vectorized = []

for i in range(nIter) :
    u_vectorized.append((umin + i*partition_size,umin + (i+1)*partition_size));

u_midpoints = get_midpoint_vector(u_vectorized)
u_class = np.array(get_fuzzy_dataset(dat),dtype=np.int32)

u_unique_inverted, u_unique_mapping = mapping_class(u_class)
u_class_transform = [u_unique_inverted[item] for item in u_class]
sliding_number = 3
# result = []
X_train_size = int(len(u_class_transform)*0.7)
sliding = np.array(list(SlidingWindow(u_class_transform, sliding_number)))
sliding = np.array(sliding, dtype=np.int32)
X_train = sliding[:X_train_size]
y_train = u_class_transform[sliding_number:X_train_size+sliding_number]
X_test = sliding[X_train_size:]
y_test = u_class_transform[X_train_size+sliding_number-1:]
y_actual_test = dat[X_train_size+sliding_number-1:].tolist()
# # Define classifier
n_hidden = len(X_train[0]) + sliding_number
# 

In [14]:
# np.savez("fuzzy_train_direct",X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test)

In [33]:
# classifier = skflow.TensorFlowDNNClassifier(hidden_units=[n_hidden], n_classes=len(u_unique_inverted),steps=10000, 
#                                             learning_rate= 0.0001, optimizer='Adam', verbose=0)
def main_FGABPNN():   
    fit_params = {
        'neural_shape':[len(X_train[0]),n_hidden,1]
        }
    ga_estimator = GAEstimator(cross_rate=0.15, mutation_rate=0.06, gen_size=100, pop_size=30)
    nn = NeuralFlowRegressor(hidden_nodes=[n_hidden],optimize='Adam'
                                     ,steps=7000,learning_rate=1E-02)
    classifier = OptimizerNNEstimator(ga_estimator, nn)
    #     classifier = NeuralFlowRegressor(hidden_nodes=[15],optimize='Adam'
    #                                      ,steps=7000,learning_rate=1E-03)
    a = classifier.fit(X_train, y_train, **fit_params)
    ypred = np.round(abs(classifier.predict(X_test))).flatten()
    ypred_defuzzy = [defuzzy(item%len(u_unique_mapping),u_unique_mapping,u_midpoints) for item in ypred]
    score = mean_absolute_error(ypred_defuzzy,y_actual_test)
    # result.append((sliding_number,score))
    print score

In [34]:
%%timeit
main_FGABPNN()

Initialization
Initilization
Gen. 0 (0.00%): Max/Min/Avg Fitness(Raw) [14.42(25.49)/10.82(5.32)/12.02(12.02)]
Gen. 10 (33.33%): Max/Min/Avg Fitness(Raw) [20.52(37.80)/15.23(5.79)/17.10(17.10)]
Gen. 20 (66.67%): Max/Min/Avg Fitness(Raw) [30.58(59.06)/22.68(7.04)/25.48(25.48)]
Gen. 30 (100.00%): Max/Min/Avg Fitness(Raw) [40.80(67.96)/28.62(7.11)/34.00(34.00)]
Total time elapsed: 1.170 seconds.
Step #100, epoch #4, avg. train loss: 55.80948
Step #200, epoch #9, avg. train loss: 24.20297
Step #300, epoch #14, avg. train loss: 22.52450
Step #400, epoch #19, avg. train loss: 24.28082
Step #500, epoch #23, avg. train loss: 23.87231
Step #600, epoch #28, avg. train loss: 23.33643
Step #700, epoch #33, avg. train loss: 23.89617
Step #800, epoch #38, avg. train loss: 22.97145
Step #900, epoch #42, avg. train loss: 23.74812
Step #1000, epoch #47, avg. train loss: 23.70489
Step #1100, epoch #52, avg. train loss: 23.16350
Step #1200, epoch #57, avg. train loss: 23.82572
Step #1300, epoch #61, avg. 

In [6]:
%matplotlib
plot_figure(y_pred=ypred_defuzzy[:200],y_true=y_actual_test[:200], title='High Order Time Series with order %s, hidden nodes = %s: %s'%(sliding_number,n_hidden,score))

Using matplotlib backend: Qt4Agg




In [32]:
scaler = ProactiveSLA(sla=1.5, past_consecutive_values=3)
predict_allocated = scaler.allocate_VMs(np.array(y_actual_test), ypred_defuzzy)
actual_allocated = np.array([scaler.allocate_VM(item) for item in np.array(y_actual_test)])
delta = actual_allocated - predict_allocated
print float(len(delta[delta>0])) / len(predict_allocated) *100
%matplotlib
ax = plt.subplot()
ax.plot(predict_allocated[100:300],'--',label='Predict Allocated')
ax.plot(actual_allocated[100:300],label='Actual Allocated')
ax.set_xlabel('Time')
ax.set_ylabel('Number of VMs')
ax.set_title("High Order (sliding window 3) Fuzzy by CPU metric")
# ax.plot(trainee_holder['cpu_rate']['y_test'],label='Resource Used')
plt.legend()

5.79064587973
Using matplotlib backend: Qt4Agg


<matplotlib.legend.Legend at 0x7f18777bfb50>

In [66]:
np.savez('fuzzy_GABPNN_%s_%s'%(sliding_number,score),y_pred=ypred_defuzzy,y_true=y_actual_test)

# BPNN Experiment with data

In [5]:
dat_nn = np.asarray(scaler.fit_transform(dat))
X_train_size = int(len(dat_nn)*0.7)
sliding = np.array(list(SlidingWindow(dat_nn, sliding_number)))
X_train_nn = sliding[:X_train_size]
y_train_nn = dat_nn[sliding_number:X_train_size+sliding_number].reshape(-1,1)
X_test_nn = sliding[X_train_size:]
y_test_nn = dat_nn[X_train_size+sliding_number-1:].reshape(-1,1)
y_actual_test = dat[X_train_size+sliding_number-1:].tolist()



In [9]:
# # def main_BPNN():
# estimator = NeuralFlowRegressor(learning_rate=0.001, hidden_nodes=[15], steps=5000,optimize='Adam')
# estimator.fit(X_train_nn,y_train_nn)
# y_pred = scaler.inverse_transform(estimator.predict(X_test_nn))
# score_nn = mean_absolute_error(y_pred,y_actual_test)
# print score_nn

In [28]:
%matplotlib
plot_figure(y_pred=y_pred,y_true=y_actual_test, title='BPNN with sliding window = %s: %s'%(sliding_number,score_nn))

Using matplotlib backend: Qt4Agg


In [31]:
scaler = ProactiveSLA(sla=1.5, past_consecutive_values=3)
predict_allocated = scaler.allocate_VMs(np.array(y_actual_test), y_pred)
actual_allocated = np.array([scaler.allocate_VM(item) for item in np.array(y_actual_test)])
delta = actual_allocated - predict_allocated
print float(len(delta[delta>0])) / len(predict_allocated) *100
%matplotlib
ax = plt.subplot()
ax.plot(predict_allocated[100:300],'--',label='Predict Allocated')
ax.plot(actual_allocated[100:300],label='Actual Allocated')
ax.set_xlabel('Time')
ax.set_ylabel('Number of VMs')
ax.set_title("High Order (sliding window 3) Fuzzy by CPU metric")
# ax.plot(trainee_holder['cpu_rate']['y_test'],label='Resource Used')
plt.legend()

6.01336302895
Using matplotlib backend: Qt4Agg


<matplotlib.legend.Legend at 0x7f18242cc950>

In [166]:
np.savez('neuro_%s_%s'%(sliding_number,score_nn),y_pred=y_pred,y_true=y_actual_test)

# GABPNN

In [43]:
def main_GABPNN():
    fit_params = {
        'neural_shape':[len(X_train_nn[0]),10,1]
        }
    ga_estimator = GAEstimator(cross_rate=0.95, mutation_rate=0.05, gen_size=100, pop_size=30)
    nn = NeuralFlowRegressor(hidden_nodes=[10],optimize='Adam'
                                     ,steps=7000,learning_rate=1E-02)
    classifier = OptimizerNNEstimator(ga_estimator, nn)
    classifier.fit(X_train_nn, y_train_nn,**fit_params)
    y_pred = scaler.inverse_transform(classifier.predict(X_test_nn))
    score_nn = mean_absolute_error(y_pred,y_actual_test)
    print score_nn

In [44]:
%%timeit
main_GABPNN()

Initialization
Initilization
Gen. 0 (0.00%): Max/Min/Avg Fitness(Raw) [1.12(2.63)/0.84(0.05)/0.94(0.94)]
Gen. 10 (33.33%): Max/Min/Avg Fitness(Raw) [1.48(4.32)/1.14(0.07)/1.24(1.24)]
Gen. 20 (66.67%): Max/Min/Avg Fitness(Raw) [4.05(7.51)/3.01(1.16)/3.37(3.37)]
Gen. 30 (100.00%): Max/Min/Avg Fitness(Raw) [5.23(7.68)/3.79(2.20)/4.36(4.36)]
Total time elapsed: 0.765 seconds.
Step #100, epoch #4, avg. train loss: 2.55504
Step #200, epoch #9, avg. train loss: 0.00085
Step #300, epoch #14, avg. train loss: 0.00067
Step #400, epoch #19, avg. train loss: 0.00071
Step #500, epoch #23, avg. train loss: 0.00069
Step #600, epoch #28, avg. train loss: 0.00064
Step #700, epoch #33, avg. train loss: 0.00067
Step #800, epoch #38, avg. train loss: 0.00063
Step #900, epoch #42, avg. train loss: 0.00064
Step #1000, epoch #47, avg. train loss: 0.00065
Step #1100, epoch #52, avg. train loss: 0.00063
Step #1200, epoch #57, avg. train loss: 0.00064
Step #1300, epoch #61, avg. train loss: 0.00064
Step #1400, 

# Keras Multilayer

In [7]:
from keras.layers import Dense,Dropout, Input
from keras.models import Sequential, Model

In [8]:
X_train_nn.shape

(2100, 3)