<a href="https://colab.research.google.com/github/ParitoshP702/Bilevel-Optimization/blob/main/Genetic_Algorithm(CIFAR).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd
from tqdm import tqdm
import random

In [None]:
pip install gurobipy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gurobipy
  Downloading gurobipy-10.0.0-cp38-cp38-manylinux2014_x86_64.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gurobipy
Successfully installed gurobipy-10.0.0


In [None]:
import gurobipy as gp

In [None]:
params = {
"WLSACCESSID": '753e7886-7142-449d-8baa-d41ca78716ef',
"WLSSECRET": '880d2525-364b-41d0-ac23-6dcf7ad15312',
"LICENSEID": 914249,
}
env = gp.Env(params=params)

# Create the model within the Gurobi environment

Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 914249
Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in


In [None]:
(X_train,Y_train),(X_test,Y_test) = tf.keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
train_count = 3500
eval_count = 1000
test_count = 1000

In [None]:
X_train = X_train/255.0
X_test  = X_test/255.0

In [None]:
x_train = X_train[:train_count,:,:,:]
x_eval = X_train[train_count:train_count+eval_count,:,:,:]
y_train = Y_train[:train_count]
y_eval = Y_train[train_count:train_count+eval_count]

In [None]:
x_test = X_test[:test_count,:,:,:]
y_test = Y_test[:test_count]

In [None]:
#Flattening our datasets
x_train = x_train.reshape(x_train.shape[0],-1)
x_eval = x_eval.reshape(x_eval.shape[0],-1)
x_test = x_test.reshape(x_test.shape[0],-1)

In [None]:
y_training = np.zeros(shape = (len(y_train),10), dtype = float)#one hot encoding the training labels
for i in range(len(y_train)):
  for j in range(10):
    if j  == y_train[i]:
      y_training[i][j] = 1.0
    else:
      y_training[i][j] = 0.0

In [None]:
y_val_array = np.zeros(shape = (len(y_eval),10),dtype  =float)#one hot encoding the validation labels
for i in range(len(y_eval)):
  for j in range(10):
    if j  == y_eval[i]:
      y_val_array[i][j] = 1.0
    else:
      y_val_array[i][j] = 0.0

In [None]:
y_testing = np.zeros(shape = (len(y_test),10),dtype  =float)#one hot encoding the validation labels
for i in range(len(y_test)):
  for j in range(10):
    if j  == y_test[i]:
      y_testing[i][j] = 1.0
    else:
      y_testing[i][j] = 0.0

In [None]:
loss_object = tf.keras.losses.CategoricalCrossentropy()

In [None]:
def complete_weight_array(model):
  weights_list = []
  for i in range(len(model.weights)):
    weights_array = tf.make_ndarray(tf.make_tensor_proto(model.weights[i]))
    if i%2 == 0:
      shape_array = weights_array.shape
      for j in range(shape_array[0]):
        for k in range(shape_array[1]):
          weights_list.append(weights_array[j][k])
          # if len(weights_list_new) < skip_length:
          #   weights_list_new.append(0)
          # else:
          #   weights_list_new.append(weights_array[j][k])
    else:
      lgt = weights_array.shape[0]
      for j in range(lgt):
        weights_list.append(weights_array[j])
        # if len(weights_list_new) < skip_length:
        #   weights_list_new.append(0)
        # else:
        #   weights_list_new.append(weights_array[j])
  return np.array(weights_list)

In [None]:
def weight_array_for_hessian(model):
  skip_length = len(model.layers[0].weights[0].numpy().reshape(-1)) + len(model.layers[0].weights[1].numpy().reshape(-1))
  weights_list = []
  for i in range(len(model.weights)):
    weights_array = tf.make_ndarray(tf.make_tensor_proto(model.weights[i]))
    if i%2 == 0:
      shape_array = weights_array.shape
      for j in range(shape_array[0]):
        for k in range(shape_array[1]):
          # weights_list.append(weights_array[j][k])
          if len(weights_list) < skip_length:
            weights_list.append(0)
          else:
            weights_list.append(weights_array[j][k])
    else:
      lgt = weights_array.shape[0]
      for j in range(lgt):
        # weights_list.append(weights_array[j])
        if len(weights_list) < skip_length:
          weights_list.append(0)
        else:
          weights_list.append(weights_array[j])
  return np.array(weights_list)

In [None]:
def compute_gradient(x_target,y_target,model):##general function which returns the list of gradient vector as an numpy array
  with tf.GradientTape() as tape:
    loss_object = tf.keras.losses.MeanSquaredError()
    y_pred_array = model(x_target,training = True)
    loss = loss_object(y_target,y_pred_array)
  g = tape.gradient(loss,model.trainable_variables)
  final_grad_list = []
  for i in range(len(g)):
    grad_array = tf.make_ndarray(tf.make_tensor_proto(g[i]))
    if i%2==0:
      grad_shape = grad_array.shape
      for j in range(grad_shape[0]):
        for k in range(grad_shape[1]):
          final_grad_list.append(grad_array[j][k])
    else:
      length = grad_array.shape[0]
      for j in range(length):
        final_grad_list.append(grad_array[j])
  return np.array(final_grad_list)


In [None]:
def compute_hessian(model):
  final_hessian_list = []
  with tf.GradientTape(persistent = True) as tape1:
    with tf.GradientTape(persistent = True) as tape2:
      loss_object = tf.keras.losses.CategoricalCrossentropy()
      y_pred_array = model(x_train,training = True)
      loss = loss_object(y_training,y_pred_array)
    g = tape2.gradient(loss, model.trainable_variables)
  for i in range(len(g)):
    # reshaped_grad = tf.reshape(g[i], [-1])
    h = tape1.jacobian(g[i],model.trainable_variables)
    final_hessian_list.append(h)


  ##Now this final hessian list is actually a double dimensional list of tensors, so we will convert it into a matrix
  #reshaping the double dimensional list of tensors into a matrix
  hessian_matrix = np.empty(shape = (1,1),dtype = float)
  for i in range(len(final_hessian_list)):
    hess_col_mat = np.empty(shape = (1,1),dtype = float)
    for j in range(len(final_hessian_list[i])):
      hess_array = tf.make_ndarray(tf.make_tensor_proto(final_hessian_list[i][j]))
      hess_shape = hess_array.shape
      if i%2 == 0:
        if j%2 == 0:
          hess_array = hess_array.reshape(hess_shape[0]*hess_shape[1],hess_shape[2]*hess_shape[3])
        else:
          hess_array = hess_array.reshape(hess_shape[0]*hess_shape[1],hess_shape[2])
      else:
        if j%2 == 0:
          hess_array = hess_array.reshape(hess_shape[0],hess_shape[1]*hess_shape[2])
        else:
          hess_array = hess_array
      if j==0 :
        hess_col_mat = hess_array
      else:
        hess_col_mat = np.concatenate((hess_col_mat,hess_array),axis = 1)
    if i==0:
      hessian_matrix = hess_col_mat
    else:
      hessian_matrix= np.concatenate((hessian_matrix,hess_col_mat),axis = 0)


  return hessian_matrix



In [None]:
def perform_fine_tuning(model,params_model):
  number_of_layers = params_model[1]
  reg_param = params_model[3]
  neurons_per_layer = params_model[0]
  activation_fun = params_model[2]

  ###calculating the hessian for the model and the gradient of the validation loss
  hessian_matrix = compute_hessian(model)
  grad_validation = compute_gradient(x_eval,y_val_array,model)
  final_weights_array_new = weight_array_for_hessian(model)
  l = len(final_weights_array_new)


  ##adding the regularization term in the hessian
  weight_array_vec = final_weights_array_new.reshape(l,1)/len(y_train)
  hessian_col_mat = np.concatenate((weight_array_vec,hessian_matrix),axis = 1)
  weight_array_withreg = np.concatenate(([[0]],final_weights_array_new.reshape(1,l)),axis = 1)/len(y_train)
  hessian_mat_with_reg = np.concatenate((weight_array_withreg,hessian_col_mat),axis = 0)


  grad_validation_new = np.concatenate(([[0]],grad_validation.reshape(1,l)),axis = 1)#validation array with regularization


  ##Solving the linear program
  ub = [10 for i in range(l+1)]
  lb = []
  for i in range(l+1):
    if i==0:
      lb.append(1e-5)
    else:
      lb.append(-10)


  # Create the model within the Gurobi environment
  m = gp.Model(env=env)
  # m = gp.Model()
  x = m.addMVar((l+1,),lb = lb, ub = ub )
  m.setObjective(grad_validation_new@x)
  # m.addConstr(hessian_mat_with_reg@x == 0)
  m.addConstr(hessian_mat_with_reg@x <= 0.1)
  m.addConstr(hessian_mat_with_reg@x >= -0.1)
  x.PStart = np.zeros(l+1)
  # GRBModel.Set(Pstart = np.zeros(l+1))
  m.optimize()
  all_vars = m.getVars()
  values = m.getAttr("x",all_vars)
  values = np.array(values)
  values = values/np.linalg.norm(values)

  final_weights_array = complete_weight_array(model)
  weight_array_with_reg = np.concatenate(([[reg_param]],final_weights_array.reshape(1,l)),axis = 1).reshape(-1)
  descent_factors = []
  for i in range(-100,20000,200):
    descent_factors.append(i*1e-3)
  descent_factors = np.array(descent_factors)


  weight_sample_space_matrix = np.empty(shape = (len(descent_factors),len(weight_array_with_reg)),dtype = float)##initializing the weight sample space matrix
  for i in range(len(descent_factors)):
    weight_sample_space_matrix[i] =weight_array_with_reg+ values*descent_factors[i]   ##assigning values to the weight sample space matrix


  ##defining the loss object
  loss_object = tf.keras.losses.CategoricalCrossentropy()

  ##computation for validation loss
  def validation_loss_computation(weight_and_reg_array):##function which computes the loss score of the model corresponding to given weights

      model_demo = Sequential()
      model_demo.add(Dense(units = 2, input_dim = 3072))
      for i in range(number_of_layers):
          model_demo.add(Dense(units = neurons_per_layer, activation = activation_fun, kernel_regularizer = tf.keras.regularizers.L2(weight_and_reg_array[0])))
      model_demo.add(Dense(units = 10,activation = "softmax", kernel_regularizer = tf.keras.regularizers.L2(weight_and_reg_array[0])))
      model_demo.compile(loss = "mean_squared_error", optimizer = "Adam", metrics = ["accuracy"])
      weight_tracker = 1##as "weight_and_reg_array" is a one dimensional array it keeps track of the indices of the array
      for i in range(len(model_demo.layers)):##changing the weights of the model layer wise
        orignal_weight_list = model.layers[i].weights
        array_1 = orignal_weight_list[0].numpy()##array corresponding to the weight matrix
        array_2 = orignal_weight_list[1].numpy()##array corresponding to the bias vector
        array_1_new = weight_and_reg_array[weight_tracker:weight_tracker+array_1.shape[0]*array_1.shape[1]]
        weight_tracker += array_1.shape[0]*array_1.shape[1]##updating the weight tracker
        array_2_new = weight_and_reg_array[weight_tracker:weight_tracker + array_2.shape[0]]
        weight_tracker += array_2.shape[0] #updating the weight tracker
        array_1_new = array_1_new.reshape(array_1.shape) ##new weight matrix
        array_2_new = array_2_new.reshape(array_2.shape) ##new bias vector
        list_of_new_array = [] ##list of the new weight matrix and the new bias vector
        list_of_new_array.append(array_1_new)
        list_of_new_array.append(array_2_new)
        model_demo.layers[i].set_weights(list_of_new_array) ##appending the new weights into the given layer of the model
      y_pred_array = model_demo(np.array(x_eval),training = False)
      y_pred_training = model_demo(np.array(x_train),training = False)
      loss = loss_object(y_val_array,y_pred_array)
      loss_t = loss_object(y_training,y_pred_training)
      # loss1,_ = model_demo.evaluate(x_eval,y_eval,verbose= 0)
      # loss2,_= model_demo.evaluate(x_train,y_train,verbose = 0)
      return loss,loss_t,model_demo


  loss_array_valid = np.empty(shape = len(descent_factors),dtype = float)##array to contain the training losses
  loss_array_train = np.empty(shape = len(descent_factors),dtype = float)##array to contain the validation losses

  for i in range(len(loss_array_valid)):
    loss_array_valid[i] ,loss_array_train[i],_= validation_loss_computation(weight_sample_space_matrix[i])


  ideal_weight_array = weight_sample_space_matrix[loss_array_valid.argmin()]
  ideal_regularization_parameter = ideal_weight_array[0]
  _,_,best_model = validation_loss_computation(ideal_weight_array)

  return ideal_weight_array[0],best_model,loss_array_valid.min()



In [None]:
def hyperparameters(returnAs='vals'):
    parameters = {}

    #Add other parameters here

    parameters["neurons_per_layer"] = [5,10,15]
    parameters["number_of_layers"] = [1, 2, 3]
    parameters["activation_function"] = ['relu', 'tanh', 'sigmoid']
    parameters["regularization_parameter"] = [1e-10,1e-9,1e-8]
    #Search over regularization parameter as well
    #parameters["regularization"] = []

    #Keep the last one as optimizer
    #parameters["optimization_method"] = ['adam', 'rmsprop']
    # parameters["optimization_method"] = ['adam']

    if returnAs == 'dict': return(parameters)
    if returnAs == 'vals': return(list(parameters.values()))
    if returnAs == 'keys': return(list(parameters.keys()))

# def hyperparameters_old():
#     parameters = []
#     units_per_layer = [5, 10, 15]
#     layers = [1, 2, 3]
#     activation = ['relu', 'tanh', 'sigmoid']
#     optimizer = ['adam', 'rmsprop']
#     parameters.append(units_per_layer)
#     parameters.append(layers)
#     parameters.append(activation)
#     parameters.append(optimizer)

#     return(parameters)

In [None]:
def generate_population(size):
    parameters = hyperparameters()

    population = []
    i=0
    while i < size:
        individual = [random.choice(parameters[j]) for j in range(len(parameters))]
        if individual not in population:
            population.append(individual)
            i+=1
    return(population)

In [None]:
generate_population(10)

[[5, 2, 'relu', 1e-08],
 [10, 2, 'sigmoid', 1e-10],
 [15, 1, 'tanh', 1e-09],
 [15, 3, 'tanh', 1e-09],
 [10, 1, 'tanh', 1e-09],
 [15, 3, 'relu', 1e-08],
 [5, 1, 'sigmoid', 1e-09],
 [5, 3, 'relu', 1e-10],
 [10, 2, 'tanh', 1e-10],
 [15, 2, 'sigmoid', 1e-10]]

In [None]:
def new_child(parent1, parent2):
    parent_size = len(parent1)
    rint = random.randint(0, parent_size)
    #child = [random.choice([parent1[i],parent2[i]]) for i in range(parent_size)]
    child1 = parent1[:rint]+parent2[rint:]
    child2 = parent2[:rint]+parent1[rint:]
    child = random.choice([child1,child2])

    return(child)

In [None]:
def mutation(population):
    parameters = hyperparameters()
    for chromosome in population:
        if random.random() < 0.1 :
            key = random.choice(range(len(parameters)))
            parameters = hyperparameters()
            mutate_key = random.choice(parameters[key])
            chromosome[key] = mutate_key

    return(population)

In [None]:
def train_model(parameters,initialWeights=None):
    neurons_per_layer = parameters[0]
    no_of_layers = parameters[1]
    activation_function = parameters[2]

    #Following is not used here
    # optimization_method = parameters[3]
    regularization_param = parameters[3]

    model = Sequential()
    model.add(Dense(units=2, input_dim=3072))

    for _ in range(no_of_layers):
        model.add(Dense(units=neurons_per_layer, activation=activation_function,kernel_regularizer = tf.keras.regularizers.L2(regularization_param)))

    model.add(Dense(units = 10,  activation = 'softmax',kernel_regularizer = tf.keras.regularizers.L2(regularization_param)))




    return(model)

def evaluate_model(individual,initialWeights=None):
    model = train_model(individual,initialWeights)

    #The last element in the individual should always be the optimizer
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="Adam", metrics=['accuracy'])
    model.fit(x_train, y_training, batch_size = 64, epochs = 10)

    # print("Training Accuracy:", model.evaluate(x_train, y_train, verbose = 0)[1])

    #Evaluate on evaluation data
    # loss_score, accuracy_score = model.evaluate(x_eval, y_val_array, verbose = 0)
    y_pred = model(x_eval,training = False)
    loss_score = loss_object(y_pred,y_val_array)
    return loss_score,model

In [None]:
generations = 5
population_size = 10

initial_population = generate_population(population_size)
losses = []
models = []
# losses = [evaluate_model(individual) for individual in initial_population]
for individual in initial_population:
  loss,model = evaluate_model(individual)
  losses.append(loss)
  models.append(model)
for i in range(len(models)):
  initial_population[i][3],models[i],losses[i] = perform_fine_tuning(models[i],initial_population[i])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E



Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 12704 rows, 6352 columns and 80671222 nonzeros
Model fingerprint: 0x78404bce
Coefficient statistics:
  Matrix range     [9e-11, 2e-01]
  Objective range  [4e-09, 8e-04]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 0 rows and 0 columns (presolve time = 7s) ...
Presolve removed 0 rows and 0 columns (presolve time = 12s) ...
Presolve removed 0 rows and 0 columns (presolve time = 22s) ...
Presolve removed 6352 rows and 0 columns
Presolve time: 25.17s
Presolved: 6352 rows, 12704 columns, 40341963 nonzeros

Concurrent LP optimizer: primal simplex, dual simplex, and barrier
Showing barrie

In [None]:
# generation_wise_best_loss = []

In [None]:
print(initial_population)
print(losses)

[[15, 1, 'sigmoid', 8.484527069257649e-08], [15, 3, 'relu', 0.07474082851512698], [5, 3, 'relu', 8.24731542010705e-08], [15, 3, 'sigmoid', 8.174188065172276e-08], [5, 3, 'relu', 9.243049043134073e-08], [5, 2, 'tanh', 0.0631767487548697], [5, 3, 'tanh', 7.216560929843531e-08], [15, 3, 'sigmoid', 7.18342904381602e-08], [15, 2, 'sigmoid', 0.07816582018745337], [5, 2, 'sigmoid', 0.14067856106187268]]
[1.8675769567489624, 1.9467943906784058, 2.069425582885742, 2.050471305847168, 2.031968832015991, 1.8606630563735962, 1.8620715141296387, 2.053598642349243, 1.9604575634002686, 2.217613697052002]


In [None]:
#Append fitness to population
population_with_fitness = [pop+[f] for pop,f in zip(initial_population,losses)]

#Write the generation steps here
for _ in tqdm(range(generations)):
    #Includes start as well as end while generating random integers
    r1 = random.randint(0, population_size-1)
    r2 = random.randint(0, population_size-1)

    parent1 = population_with_fitness[r1][0:-1]
    parent2 = population_with_fitness[r2][0:-1]

    number_of_offspring = 2
    offspring = [new_child(parent1,parent2) for i in range(number_of_offspring)]
    offspring = mutation(offspring)

    offspring_losses = []
    offspring_models = []

    for individual in offspring:
      loss,model = evaluate_model(individual)
      offspring_losses.append(loss)
      offspring_models.append(model)
    for i in range(len(offspring_losses)):
      offspring[i][3],offspring_models[i],offspring_losses[i] = perform_fine_tuning(offspring_models[i],offspring[i])
    # offspring_fitness = [evaluate_model(individual) for individual in offspring]

    offspring_with_fitness = [pop+[f] for pop,f in zip(offspring,offspring_losses)]

    population_with_fitness.extend(offspring_with_fitness)

    #Sort in descending by fitness
    population_with_fitness.sort(key = lambda i: i[-1])

    #Keep the best members
    population_with_fitness = population_with_fitness[0:population_size]
    # generation_wise_best_loss.append(population_with_fitness[0][-1].numpy())

best_individual = population_with_fitness[0][0:-1]
best_fitness = population_with_fitness[0][-1]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 13664 rows, 6832 columns and 91257796 nonzeros
Model fingerprint: 0x93692715
Coefficient statistics:
  Matrix range     [3e-13, 1e+00]
  Objective range  [4e-08, 3e-03]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 148 rows and 0 columns (presolve time = 5s) ...
Presolve removed 148 rows and 72 columns (presolve time = 12s) ...
Presolve removed 148 rows and 72 colu

 20%|██        | 1/5 [06:22<25:29, 382.28s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 12564 rows, 6282 columns and 78902462 nonzeros
Model fingerprint: 0x7aa0dd5b
Coefficient statistics:
  Matrix range     [1e-11, 5e-01]
  Objective range  [6e-07, 6e-04]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 0 rows and 0 columns (presolve time = 6s) ...
Presolve removed 0 rows and 0 columns (presolve time = 10s) ...
Presolve removed 0 rows and 0 columns (pre

 40%|████      | 2/5 [10:45<15:36, 312.10s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 13664 rows, 6832 columns and 86811170 nonzeros
Model fingerprint: 0xe755cd14
Coefficient statistics:
  Matrix range     [2e-13, 4e+00]
  Objective range  [1e-09, 2e-03]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 510 rows and 242 columns (presolve time = 7s) ...
Presolve removed 510 rows and 242 columns (presolve time = 11s) ...
Presolve removed 510 rows and 242 

 60%|██████    | 3/5 [15:37<10:06, 303.25s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 13664 rows, 6832 columns and 91759760 nonzeros
Model fingerprint: 0x479e10c5
Coefficient statistics:
  Matrix range     [3e-13, 9e-01]
  Objective range  [3e-10, 4e-03]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 114 rows and 0 columns (presolve time = 6s) ...
Presolve removed 114 rows and 54 columns (presolve time = 12s) ...
Presolve removed 114 rows and 54 colu

 80%|████████  | 4/5 [22:18<05:41, 341.65s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Setting LP warm start basis or start ignored
Gurobi Optimizer version 10.0.0 build v10.0.0rc2 (linux64)

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Academic license - for non-commercial use only - registered to ppankaj21@iitk.ac.in
Optimize a model with 13664 rows, 6832 columns and 88411102 nonzeros
Model fingerprint: 0x74e8052d
Coefficient statistics:
  Matrix range     [5e-13, 2e+00]
  Objective range  [4e-10, 3e-03]
  Bounds range     [1e-05, 1e+01]
  RHS range        [1e-01, 1e-01]
Presolve removed 456 rows and 0 columns (presolve time = 6s) ...
Presolve removed 456 rows and 173 columns (presolve time = 11s) ...
Presolve removed 456 rows and 173 co

100%|██████████| 5/5 [27:38<00:00, 331.79s/it]


In [None]:
best_individual # [5, 3, 'tanh', 'adam']


[15, 3, 'relu', 1.5260539923722828e-07]

In [None]:
best_fitness

1.8272013664245605

In [None]:
# s = evaluate_model(best_individual)
# s

In [None]:
population_with_fitness

[[15, 3, 'relu', 1.5260539923722828e-07, 1.8272013664245605],
 [15, 3, 'tanh', 0.06594886242309055, 1.829402208328247],
 [15, 3, 'relu', 0.09018558650309791, 1.8299793004989624],
 [5, 2, 'tanh', 0.0631767487548697, 1.8606630563735962],
 [5, 3, 'tanh', 7.216560929843531e-08, 1.8620715141296387],
 [15, 1, 'sigmoid', 8.484527069257649e-08, 1.8675769567489624],
 [15, 3, 'relu', 0.0903385981646327, 1.8821998834609985],
 [15, 3, 'relu', 0.07533746453026165, 1.889880657196045],
 [15, 3, 'relu', 0.07474082851512698, 1.9467943906784058],
 [5, 3, 'tanh', 1.531303440426374e-07, 1.9556595087051392]]

In [None]:
best_individual

[15, 3, 'relu', 1.5260539923722828e-07]

In [None]:
number_of_layers = best_individual[1]
neurons_per_layer = best_individual[0]
activation_function = best_indvidual[2]
reg = best_individual[3]

NameError: ignored

In [None]:
model = Sequential()
model.add(Dense(units = 2,input_dim = 3072))
for i in range(number_of_layers):
  model.add(Dense(units = neurons_per_layer, activation = activation_function, kernel_regularizer = tf.keras.regularizers.L2(reg)))
model.add(Dense(units = 10,activation = "softmax",kernel_regularizer = tf.keras.regularizers.L2(reg)))
model.compile(loss = tf.keras.losses.CategoricalCrossentropy(),optimizer = "Adam", metrics = ["accuracy"])

In [None]:
model.fit(x_train,y_training, epochs = 10, batch_size = 64)
_,accuracy = model.evaluate(x_eval,y_val_array)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
accuracy##accuracy of model with LP

0.22200000286102295

In [None]:
import pickle

In [None]:
file_to_store_loss  = open("GA_CIFAR_WITH_LP","wb")
pickel.dump(file_to_store_loss,generation_wise_best_loss)
file_to_store_loss.close()

In [None]:
population_with_fitness[0]

In [None]:
number_of_layers = 3
neurons_per_layer = 10
activation_function = "relu"
reg = 1.5260539923722828e-07

In [None]:
y_pred_test = model(x_test,training = False)

In [None]:
loss_object(y_pred_test,y_testing).numpy()

13.696813470037549