<a href="https://colab.research.google.com/github/Alsr96/LMMAES/blob/master/LMAES_layerwise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

class structtype():
    pass

def nw_to_vec(model,layer_idx=None):
    n_layers = len(model.layers)
    vector=np.empty((0,))
    ind=np.zeros((1,))
    sum_i=0
    if layer_idx==None:
        idx=range(n_layers)
    else:
        idx=layer_idx
    for i in idx:
        if len(model.layers[i].get_weights())==2:
            weights, biases = model.layers[i].get_weights()
            s_w=np.size(weights)
            sum_i=sum_i+s_w
            ind=np.append(ind,sum_i)
            w_v=np.reshape(weights,(s_w,))
            s_b=np.size(biases)
            sum_i=sum_i+s_b
            ind=np.append(ind,sum_i)
            b_v=np.reshape(biases,(s_b,))
            wb=np.append(w_v,b_v)
            vector=np.append(vector,wb)
    return vector, ind

def vec_to_nw(vector,ind,model,layer_idx=None):
    n_layers = len(model.layers)
    if layer_idx==None:
        idx=range(n_layers)
    else:
        idx=layer_idx
    k=0
    for i in idx:
        if len(model.layers[i].get_weights())==2:
            weights,biases=model.layers[i].get_weights()
            j1=k
            j2=k+1
            j3=k+2
            weights=np.reshape(vector[int(ind[j1]):int(ind[j2])],np.shape(weights))
            biases=np.reshape(vector[int(ind[j2]):int(ind[j3])],np.shape(biases))
            model.layers[i].set_weights((weights,biases))
            k=k+2
    return model

#split=[15,15]
def split_data(input_data,target2,split):
    n_samples=len(target2)
    s1_input=input_data[0:int(split[0]*n_samples/(split[0]+split[1]))]
    s1_target=target2[0:int(split[0]*n_samples/(split[0]+split[1]))]
    s2_input=input_data[int(split[0]*n_samples/(split[0]+split[1])):n_samples]
    s2_target=target2[int(split[0]*n_samples/(split[0]+split[1])):n_samples]
    return s1_input, s1_target, s2_input, s2_target

class lmmaes(object):
    def __init__(self,model,n_candidates = None,sigma=1/10,function_budget=10000,function_target=None,layer_idx=None):
      # train data
      #train_input=train_data[0]
      #train_target=train_data[1]
      # validation data
      #if validation_data!=None:
        #validation_input=validation_data[0]
        #validation_target=validation_data[1]
      # test data
      #if test_data!=None:
        #test_input=test_data[0]
        #test_target=test_data[1]
      self.sigma=sigma
      # converting the weights and biases to a row vector
      self.layer_idx=layer_idx
      self.y, self.ind = nw_to_vec(model,layer_idx=self.layer_idx)
      
      # number of layers to optimise
      if self.layer_idx==None:
          self.n_layers = len(model.layers)
          itr = range(self.n_layers)
      else:
          self.n_layers = len(self.layer_idx)
          itr = self.layer_idx

      # calculating number of dimensions
      self.n_dimensions=0
      for i in itr:
          if len(model.layers[i].get_weights())==2:
              weights, biases = model.layers[i].get_weights()
              self.n_dimensions=np.size(weights)+np.size(biases)+self.n_dimensions

      # number of candidate solutions generated
      self.n_candidates=n_candidates
      if self.n_candidates==None:
          self.n_candidates = 4 + np.floor(3*np.log(self.n_dimensions))

      # number of best solutions selected
      self.mu = np.floor(self.n_candidates/2)

      # weights for selected solutions
      self.w = np.empty([0,0])
      for i in range(int(self.mu)):
          self.w = np.append(self.w,np.log(self.mu+0.5)-np.log(i+1))
      sum_w = np.sum(self.w)
      self.w = self.w/sum_w


      self.mu_w = 1/(np.sum(np.square(self.w)))

      # number of evolution paths
      self.m = 4 + np.floor(3*np.log(self.n_dimensions))


      self.c_sigma = 2*self.n_candidates/self.n_dimensions
      self.const1=np.sqrt(self.mu_w*self.c_sigma*(2-self.c_sigma))

      # learning rates
      self.c_d = np.empty([0,0])
      self.c_c = np.empty([0,0])
      self.const2=np.empty([0,0])
      for i in range(int(self.m)):
          self.c_d = np.append(self.c_d,1/(self.n_dimensions*(1.5**i)))
          self.c_c = np.append(self.c_c,self.n_candidates/(self.n_dimensions*(4**i)))
          self.const2=np.append(self.const2,np.sqrt(self.mu_w*self.c_c[i]*(2-self.c_c[i])))

      self.t=0

      # length of evolution paths (exponentially fading record of recent most successful steps)
      self.p_sigma = np.zeros((self.n_dimensions,))

      # vectors modelling deviation of transformation matrix from identity matrix
      self.m_i = np.zeros((int(self.m), self.n_dimensions))
    
    def train_on_batch(self,model,train_data,divide_data=False):
      self.func_calls=0
      #t, y, p_sigma, sigma = lm_func.step_lmmaes(t,n_dimensions,m,n_candidates,c_c,c_d,loss_func,y,sigma,mu,w,c_sigma,const1,const2,data)
      z=np.random.randn(int(self.n_candidates),self.n_dimensions)
      d = np.copy(z)
      f_list=np.empty((int(self.m),1))
      for i in range(int(self.m)):
          for j in range(np.minimum(self.t,int(self.m))):
              d[i]=(1-self.c_d[j])*d[i] + self.c_d[j]*np.sum(self.m_i[j]**2)*d[i]#(np.matmul(m_i[j][np.newaxis],np.matmul(np.transpose(m_i[j][np.newaxis]),d[i][np.newaxis])))
          #res = list(map(benchmark_functions.rosenbrock,[y+sigma*d[i]])) ###############
          model=vec_to_nw(self.y+self.sigma*d[i],self.ind,model,layer_idx=self.layer_idx)

          train_input=train_data[0]
          train_target=train_data[1]
          num_samples=len(train_target)-1
          if divide_data==False:
            res=model.evaluate(x=train_input,y=train_target,verbose=0)
          else:
            res=model.evaluate(x=train_input[int(i*num_samples/self.m):int((i+1)*num_samples/self.m)],y=train_target[int(i*num_samples/self.m):int((i+1)*num_samples/self.m)],verbose=0)
          self.func_calls=self.func_calls+1
          f_list[i][0] = res[0]

      #f_list = np.append(f_list, (np.linalg.norm(y))**2)##
      #d = np.append(d, np.zeros((1,n_dimensions)), axis=0)##
      #z = np.append(z, np.zeros((1,n_dimensions)), axis=0)##

      sortidx_f = f_list.argsort(axis=0)
      #fjhyt
      sortidx_f = sortidx_f[0:int(self.mu)]
      best_list = np.empty([int(self.mu),self.n_dimensions])
      j = 0
      for i in sortidx_f:
          best_list[j] = self.w[j]*d[i]
          j = j+1
      y_next = self.y + self.sigma*np.sum(best_list,0)

      best_list2 = np.empty([int(self.mu),self.n_dimensions])
      j = 0
      for i in sortidx_f:
          best_list2[j] = self.w[j]*z[i]
          j = j+1
      p_sigma_next = (1-self.c_sigma)*self.p_sigma + self.const1*np.sum(best_list2,0)
      mag_p_sigma_next = np.linalg.norm(p_sigma_next)
      for i in range(int(self.m)):
          self.m_i[i] = (1-self.c_c[i])*self.m_i[i] + self.const2[i]*np.sum(best_list2,0)
      sigma_next = self.sigma*np.exp(self.c_sigma*(((mag_p_sigma_next**2)/self.n_dimensions)-1)/2)
      self.t=self.t+1
      self.sigma = sigma_next
      self.p_sigma = p_sigma_next
      self.y = y_next
      model=vec_to_nw(self.y,self.ind,model,layer_idx=self.layer_idx)
      jhlk=sortidx_f[0][0]
      print(f_list[int(jhlk)],self.t,self.sigma)
      return model, self.func_calls, self.sigma
        
        ###########
        # for testing print(model.layers[0].get_weights(),model.layers[1].get_weights())
        # for testing dhytd
        #trained_model=model
        #itr=itr+1
        #res=modelt.evaluate(x=train_input,y=train_target)
        #func_calls=func_calls+1
        #log_train=np.append(log_train,np.array([[itr,res[0],res[1]]]),axis=0)
        #lmmaes.log_train=log_train
        #print('train:',[itr,res[0],res[1]*100])
    def validation_check(self,model,best_model,validation_data,best_res):
      validation_input=validation_data[0]
      validation_target=validation_data[1]
      res=model.evaluate(x=validation_input,y=validation_target)
      #func_calls=func_calls+1
      if res[1]>best_res[1]:
          best_model=model
          best_res=res
          validation_fail=False
      else:
          validation_fail=True
      
      return validation_fail, best_res, res, best_model

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

from keras.datasets import mnist
#download mnist data and split into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train=X_train.reshape(60000,28,28,1)
X_test=X_test.reshape(10000,28,28,1)

from keras.utils import to_categorical
#one-hot encode target column
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [0]:
#create model
model = Sequential()
#add model layers
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
#compile model using accuracy to measure model performance
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

xtrain,ytrain,xval,yval=split_data(X_train,y_train,[5,1])

In [0]:
import numpy as np
lm=lmmaes(model,n_candidates = None,sigma=1/10,layer_idx=None)

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

res=model.evaluate(x=xtrain,y=ytrain)
best_model=model

best_res=model.evaluate(x=xval,y=yval)

for i in range(10):
  
  xtrain,ytrain=unison_shuffled_copies(xtrain,ytrain)
  no_samples=len(ytrain)-1
  n_groups=int(no_samples/2000)
  for j in range(n_groups):
    model, func_calls, sigma = lm.train_on_batch(best_model,train_data=(xtrain[int(j*no_samples/n_groups):int((j+1)*no_samples/n_groups)],ytrain[int(j*no_samples/n_groups):int((j+1)*no_samples/n_groups)]),divide_data=False)
  
  validation_fail, best_res, res, best_model = lm.validation_check(model,best_model,(xval,yval),best_res)
  print(validation_fail, best_res[1], res[1])

log_validation
array([[ 0.        , 12.56754179,  0.05312   ,  0.        ],
       [ 1.        ,  4.14396236,  0.7429    ,  0.        ],
       [ 2.        ,  3.79903511,  0.7643    ,  0.        ],
       [ 3.        ,  2.84162023,  0.8237    ,  0.        ],
       [ 4.        ,  2.74491166,  0.8297    ,  0.        ],
       [ 5.        ,  2.52892918,  0.8431    ,  0.        ],
       [ 6.        ,  2.35968918,  0.8536    ,  0.        ],
       [ 7.        ,  2.47251584,  0.8466    ,  1.        ],
       [ 8.        ,  2.50475204,  0.8446    ,  2.        ],
       [ 9.        ,  2.30327584,  0.8571    ,  0.        ],
       [10.        ,  2.39837261,  0.8512    ,  1.        ],
       [11.        ,  2.45962137,  0.8474    ,  2.        ],
       [12.        ,  2.3129467 ,  0.8565    ,  3.        ],
       [13.        ,  2.45156233,  0.8479    ,  4.        ],
       [14.        ,  2.33228841,  0.8553    ,  5.        ],
       [15.        ,  2.42899699,  0.8493    ,  6.        ]])
log_train
array([[ 0.        , 12.56754179,  0.05312   ],
       [ 1.        ,  4.43473279,  0.72486   ],
       [ 2.        ,  4.02855679,  0.75006   ],
       [ 3.        ,  3.28325605,  0.7963    ],
       [ 4.        ,  3.02149818,  0.81254   ],
       [ 5.        ,  2.81293002,  0.82548   ],
       [ 6.        ,  2.61532217,  0.83774   ],
       [ 7.        ,  2.75361543,  0.82916   ],
       [ 8.        ,  2.77811494,  0.82764   ],
       [ 9.        ,  2.59436865,  0.83904   ],
       [10.        ,  2.65303852,  0.8354    ],
       [11.        ,  2.72363577,  0.83102   ],
       [12.        ,  2.61242091,  0.83792   ],
       [13.        ,  2.71718853,  0.83142   ],
       [14.        ,  2.56116537,  0.8411    ],
       [15.        ,  2.61790107,  0.83758   ]])
matrix
array([[ 907,    1,   11,    2,    4,   26,   16,    2,   10,    1],
       [   0, 1085,    9,    8,    6,    2,    4,    1,   20,    0],
       [  15,   18,  830,   14,   45,    3,   26,   26,   47,    8],
       [  25,   13,   46,  822,    4,   29,    1,   13,   40,   17],
       [   2,    3,    0,    2,  868,    0,   35,    5,   15,   52],
       [  59,   48,    6,   56,   38,  595,   18,    7,   50,   15],
       [  28,    6,   12,    7,   31,   11,  837,    5,   20,    1],
       [   1,   11,   41,    2,   10,    0,    2,  927,    6,   28],
       [   5,   12,   13,   14,    9,   17,    8,   20,  861,   15],
       [   8,    4,    7,   17,   81,    9,   12,   48,   25,  798]])
category wise accuracy
[86.38095238 90.34138218 85.12820513 87.07627119 79.19708029 85.98265896
 87.27841502 87.95066414 78.70201097 85.34759358 85.3       ]
