In [1]:
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
import random
from deap import base, creator, tools
import multiprocessing
multiprocessing.set_start_method('spawn', True)
from concurrent.futures import ProcessPoolExecutor
from functions import *
import time
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader

#Setting GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = my_CNN2().to(device)
print("Model device:", next(model.parameters()).device)

#load trained model
model.load_state_dict(torch.load('new_CNN_notebook.pth'))

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

# divide the dataset into multiple sets
batch_size = 256
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

subset = torch.utils.data.Subset(trainset, range(0, 40000))
trainloader = torch.utils.data.DataLoader(subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_set = torch.utils.data.Subset(trainset, range(40000, 50000))
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=2)

partition_size = 2000
num_partitions = int(len(subset) / partition_size)
small_loaders = SmallLoaders(subset, partition_size, batch_size)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
classes = trainset.classes

Model device: cuda:0
Files already downloaded and verified
Files already downloaded and verified


In [2]:
max_processes = 8
nBits = 30
Chrom_length = model.fc2.weight.size()[0] * model.fc2.weight.size()[1] + model.fc2.bias.size()[0]
w1 = -1.814       
w2 = -0.6544

creator.create("FitnessMin", base.Fitness, weights=(w1, w2))  # Minimize both objectives
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, nBits*(Chrom_length))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", calcFitness, trainloader=trainloader, model=model, nBits=nBits, Chrom_length=Chrom_length)

In [3]:
# Create a random population and find their real loss values to train the surrogate model with
val1 = 4800

popa = toolbox.population(n=5000)
pop = []
for ind in popa:
    sep=separatevariables(ind, nBits, Chrom_length)
    weightlist=[]
    for weight in sep:
        weightlist+=real2chrom(weight, nBits)
    pop.append(creator.Individual(weightlist))

# Evaluate invalid population
invalid_ind = [ind for ind in pop if not ind.fitness.valid]
print("Evaluating invalid population (%i) ..." % len(invalid_ind))

eval_time = time.time()
with ProcessPoolExecutor(max_processes) as executor:
    fitnesses = list(executor.map(toolbox.evaluate, invalid_ind))
print("  Eval time: ", time.time() - eval_time, " seconds")

for ind, fit in zip(invalid_ind, fitnesses):
    ind.fitness.values = fit

X_train = []
for individual in pop[:val1]:
    weights_biases=separatevariables(individual, nBits, Chrom_length)
    X_train.append(weights_biases)

y_train = []
for ind in pop[:val1]:
    fitness = ind.fitness.values[0]
    y_train.append(fitness)

X_val = []
for individual in pop[val1:]:
    weights_biases=separatevariables(individual, nBits, Chrom_length)
    X_val.append(weights_biases)
    
y_val = []
for ind in pop[val1:]:
    fitness = ind.fitness.values[0]
    y_val.append(fitness)

Evaluating invalid population (5000) ...
  Eval time:  9312.953893899918  seconds


In [5]:
# X_train represents flattened population's weights
# y_train represents corresponding true fitness values (loss)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to('cuda')
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to('cuda')

model = Surrogate(input_size=len(X_train[0]))
model.to('cuda')
model.load_state_dict(torch.load('surrogate_state_dict.pth'))

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train surrogate model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train_tensor)
    loss = criterion(outputs.view(-1), y_train_tensor)
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Validate the model on a validation set
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to('cuda')
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to('cuda')
model.eval()
y_val_pred = model(X_val_tensor).detach().to('cpu').numpy()

# Calculate mean squared error for the validation set
validation_mse = mean_squared_error(y_val, y_val_pred)
print(f"Validation Mean Squared Error: {validation_mse}")

Epoch 1/100, Loss: 305.45379638671875
Epoch 2/100, Loss: 164.28680419921875
Epoch 3/100, Loss: 152.6299285888672
Epoch 4/100, Loss: 52.191497802734375
Epoch 5/100, Loss: 56.938758850097656
Epoch 6/100, Loss: 80.64348602294922
Epoch 7/100, Loss: 49.6628532409668
Epoch 8/100, Loss: 15.127768516540527
Epoch 9/100, Loss: 23.75521469116211
Epoch 10/100, Loss: 43.83098602294922
Epoch 11/100, Loss: 34.591678619384766
Epoch 12/100, Loss: 15.610671043395996
Epoch 13/100, Loss: 17.181842803955078
Epoch 14/100, Loss: 31.198379516601562
Epoch 15/100, Loss: 32.609920501708984
Epoch 16/100, Loss: 19.893518447875977
Epoch 17/100, Loss: 11.699503898620605
Epoch 18/100, Loss: 16.15631866455078
Epoch 19/100, Loss: 20.23204231262207
Epoch 20/100, Loss: 13.777854919433594
Epoch 21/100, Loss: 5.410059452056885
Epoch 22/100, Loss: 5.449023246765137
Epoch 23/100, Loss: 9.777278900146484
Epoch 24/100, Loss: 9.06192398071289
Epoch 25/100, Loss: 4.026313781738281
Epoch 26/100, Loss: 2.4549527168273926
Epoch 27/

In [7]:
torch.save(model.state_dict(), 'sur10k_state_dict.pth')

In [6]:
print(len(X_train[0]))

for i in range(len(y_val)):
    print("y_val_pred: ", y_val_pred[i])
    print("y_val: ", y_val[i])

y_val_pred:  [161.07776]
y_val:  153.1266237489737
y_val_pred:  [171.26256]
y_val:  176.8866211715018
y_val_pred:  [174.14954]
y_val:  175.4591851690013
y_val_pred:  [161.98035]
y_val:  170.74244340058345
y_val_pred:  [193.48293]
y_val:  181.74482211945164
y_val_pred:  [164.69737]
y_val:  158.6342228203063
y_val_pred:  [130.46658]
y_val:  127.7588722569168
y_val_pred:  [155.00719]
y_val:  146.35037100239165
y_val_pred:  [151.42377]
y_val:  158.9191265713637
y_val_pred:  [154.96933]
y_val:  155.6033769352421
y_val_pred:  [138.46748]
y_val:  132.54203626304675
y_val_pred:  [162.77068]
y_val:  181.40045185453573
y_val_pred:  [150.37878]
y_val:  158.43821201202977
y_val_pred:  [146.34692]
y_val:  155.8451858812077
y_val_pred:  [161.15749]
y_val:  173.97464212794213
y_val_pred:  [186.79883]
y_val:  186.30963154203573
y_val_pred:  [158.39204]
y_val:  164.37488201165655
y_val_pred:  [153.41422]
y_val:  135.56622892732074
y_val_pred:  [182.25282]
y_val:  185.09584900679863
y_val_pred:  [183.00