Example of simple network 

In [98]:
import torch
import torch.nn as nn
import torch.optim as optim
def create_neural_network(input_dims, output_dims, hidden_dims=[64, 32], activation=nn.ReLU()):
    layers = []
    
    # Add input layer
    layers.append(nn.Linear(input_dims, hidden_dims[0]))
    layers.append(activation)
    
    # Add hidden layers
    for i in range(1, len(hidden_dims)):
        layers.append(nn.Linear(hidden_dims[i-1], hidden_dims[i]))
        layers.append(activation)
    
    # Add output layer
    layers.append(nn.Linear(hidden_dims[-1], output_dims))
    
    # Create the neural network
    model = nn.Sequential(*layers)
    
    return model

# Example usage
input_dims = 10
output_dims = 5
hidden_dims = [64, 32, 16]  # You can adjust the number of hidden layers and their sizes
activation = nn.ReLU()

net = create_neural_network(input_dims, output_dims, hidden_dims, activation)
print(net)


Sequential(
  (0): Linear(in_features=10, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=16, bias=True)
  (5): ReLU()
  (6): Linear(in_features=16, out_features=5, bias=True)
)


Creating complex network with residual network

In [95]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.stride != 1 or identity.shape[1] != out.shape[1]:
            identity = nn.Conv2d(identity.shape[1], out.shape[1], kernel_size=1, stride=self.stride, bias=False)(identity)
            identity = nn.BatchNorm2d(out.shape[1])(identity)

        out += identity
        out = self.relu(out)

        return out

class ComplexNeuralNetwork(nn.Module):
    def __init__(self, input_dims=1, output_dims=10, conv_channels=[16, 32], hidden_dims=[64, 32], activation=nn.ReLU()):
        super(ComplexNeuralNetwork, self).__init__()
        
        layers = []
        
        # Initial convolutional layer
        layers.append(nn.Conv2d(in_channels=input_dims, out_channels=conv_channels[0], kernel_size=3, padding=1))
        layers.append(nn.BatchNorm2d(conv_channels[0]))
        layers.append(activation)
        
        # Convolutional layers with skip connections (using ResidualBlocks)
        in_channels = conv_channels[0]
        for out_channels in conv_channels:
            layers.append(ResidualBlock(in_channels, out_channels))
            in_channels = out_channels
        
        # Flatten the output of the convolutional layers
        layers.append(nn.Flatten())
        
        # Linear layers with skip connections
        in_features = in_channels
        for out_features in hidden_dims:
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.BatchNorm1d(out_features))
            layers.append(activation)
            in_features = out_features
        
        # Output linear layer
        layers.append(nn.Linear(in_features, output_dims))
        
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Example usage
input_dims = 1  # Number of input channels (e.g., for RGB images)
output_dims = 10  # Number of output classes
conv_channels = [16, 32]  # Number of channels in convolutional layers
hidden_dims = [64, 32]  # Sizes of hidden linear layers
activation = nn.ReLU()

net = ComplexNeuralNetwork(input_dims, output_dims, conv_channels, hidden_dims, activation)

print(net)


ComplexNeuralNetwork(
  (network): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (4): ResidualBlock(
      (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
   

In [97]:
net(torch.rand(1,1,64,64))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x131072 and 32x64)

Downloading the training and validation dataset cifar10

In [91]:
from torchvision import datasets, transforms

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])
train_dataset = datasets.MNIST('../data', train=True, download=True,
                   transform=transform)
test_dataset = datasets.MNIST('../data', train=False,
                   transform=transform)


In [92]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [93]:
model = net

In [94]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

# Train the model
for epoch in range(10):
    for i, (images, labels) in enumerate(train_dataloader):
        # Get the predictions
        print(images.shape)
        predictions = model(images)
        # Calculate the loss
        loss = criterion(predictions, labels)

        # Backpropagate the loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss every 100 iterations
        if i % 100 == 0:
            print('Epoch: {} Loss: {:.4f}'.format(epoch, loss.item()))



torch.Size([64, 1, 28, 28])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x25088 and 32x64)

In [None]:
# Evaluate the model on the test set
correct = 0
total = 0
for images, labels in test_loader:
    predictions = model(images)
    _, predicted = torch.max(predictions, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Test accuracy: {}%'.format(100 * correct / total))

In [63]:
data=next(iter(train_dataset))[0]

In [86]:
t=torch.rand(64, 1, 28, 28)

In [88]:
layer=nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1, bias=False)
layer(t).shape

torch.Size([64, 16, 28, 28])

Specifing the problem

In [43]:
from evotorch.neuroevolution import SupervisedNE

mnist_problem = SupervisedNE(
    train_dataset,  # Using the dataset specified earlier
    net,  # Training the MNIST30K module designed earlier
    nn.CrossEntropyLoss(),  # Minimizing CrossEntropyLoss
    minibatch_size = 10,  # With a minibatch size of 256
    common_minibatch = True,  # Always using the same minibatch across all solutions on an actor
    num_actors = 4,  # The total number of CPUs used
    num_gpus_per_actor = 'max',  # Dividing all available GPUs between the 4 actors
    subbatch_size = 50,  # Evaluating solutions in sub-batches of size 50 ensures we won't run out of GPU memory for individual workers
)


[2023-09-03 13:53:38] INFO     < 1180> evotorch.core: Instance of `SupervisedNE` (id:1264978372240) -- The `dtype` for the problem's decision variables is set as torch.float32
[2023-09-03 13:53:38] INFO     < 1180> evotorch.core: Instance of `SupervisedNE` (id:1264978372240) -- `eval_dtype` (the dtype of the fitnesses and evaluation data) is set as torch.float32
[2023-09-03 13:53:38] INFO     < 1180> evotorch.core: Instance of `SupervisedNE` (id:1264978372240) -- The `device` of the problem is set as cpu
[2023-09-03 13:53:38] INFO     < 1180> evotorch.core: Instance of `SupervisedNE` (id:1264978372240) -- The number of actors that will be allocated for parallelized evaluation is 4
[2023-09-03 13:53:38] INFO     < 1180> evotorch.core: Instance of `SupervisedNE` (id:1264978372240) -- Number of GPUs that will be allocated per actor is None


In [44]:
from evotorch.algorithms import SNES
searcher = SNES(mnist_problem, stdev_init = 1, popsize = 1000, distributed = True)


In [45]:
from evotorch.logging import StdOutLogger, PandasLogger
stdout_logger = StdOutLogger(searcher, interval = 1)
pandas_logger = PandasLogger(searcher, interval = 1)


In [46]:
searcher.run(10)


[2m[36m(EvaluationActor pid=19748)[0m   shares_storage = self._data.storage().data_ptr() == source._data.storage().data_ptr()


RayTaskError(RuntimeError): [36mray::EvaluationActor.call()[39m (pid=19748, ip=127.0.0.1, actor_id=d5880e87f71e3599ee0a9bc201000000, repr=<evotorch.core.EvaluationActor object at 0x0000025037736A90>)
  File "python\ray\_raylet.pyx", line 1424, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1364, in ray._raylet.execute_task.function_executor
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 185, in call
    return getattr(self._problem, method_name)(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 3074, in _sample_and_compute_gradients
    resulting_batch = sample_evaluated_batch()
                      ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 3067, in sample_evaluated_batch
    self.evaluate(batch)
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 2395, in evaluate
    self._evaluate_all(batch)
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 2413, in _evaluate_all
    self._evaluate_batch(batch)
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\neuroevolution\supervisedne.py", line 345, in _evaluate_batch
    return super()._evaluate_batch(batch)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 2447, in _evaluate_batch
    self._evaluate(sln)
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\neuroevolution\neproblem.py", line 424, in _evaluate
    fitnesses = evaluator(self.parameterize_net(parameters))
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\neuroevolution\supervisedne.py", line 338, in _evaluate_network
    loss += self._evaluate_using_minibatch(network, self._current_minibatch) / self._num_minibatches
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\neuroevolution\supervisedne.py", line 264, in _evaluate_using_minibatch
    yhat = network(x)
           ^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_1180\556631100.py", line 67, in forward
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\container.py", line 217, in forward
    input = module(input)
            ^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_1180\556631100.py", line 25, in forward
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\conv.py", line 463, in forward
    return self._conv_forward(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\torch\nn\modules\conv.py", line 459, in _conv_forward
    return F.conv2d(input, weight, bias, self.stride,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

2023-09-03 13:54:01,584	ERROR worker.py:405 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::EvaluationActor.call()[39m (pid=23744, ip=127.0.0.1, actor_id=d0394b735e7fb9664c9a39d501000000, repr=<evotorch.core.EvaluationActor object at 0x000001FA967E4790>)
  File "python\ray\_raylet.pyx", line 1424, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1364, in ray._raylet.execute_task.function_executor
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Asus\anaconda3\envs\torch-env\Lib\site-packages\evotorch\core.py", line 185, in call
    r

In [2]:
import evotorch

# Create a list of networks.
networks = []
for i in range(100):
    network = evotorch.Network()
    networks.append(network)

# Evaluate the networks.
for network in networks:
    network.evaluate()

# Select the best networks.
best_networks = networks.select_best(10)

# Recombine the best networks.
new_networks = []
for i in range(len(best_networks)):
    new_networks.append(best_networks[i].recombine())

# Mutate the best networks.
for network in new_networks:
    network.mutate()

# Return the new list of networks.
return new_networks


AttributeError: module 'evotorch' has no attribute 'Network'

In [12]:
with open("cnn.txt","r") as f:
    text=f.readlines()
text

['[47,50]\n',
 '10\n',
 '1\n',
 'relu_all\n',
 '[0.5, 0.5]\n',
 '16\n',
 '19\n',
 '0.0008724419871305545\n',
 'sgd\n',
 'mse\n',
 '20\n',
 '10\n',
 'rgrss']

In [25]:
file=open("cnn.txt","r")
hidden_fc_layers_str=transform_format(file.readline())
hidden_fc_layer=[]
for i in hidden_fc_layers_str:
    hidden_fc_layer.append(int(i))
hidden_fc_layer
input_dim=int(file.readline())
output_dim=int(file.readline())
activation_function=transform_format(file.readline())
dropout=transform_format(file.readline())
dropout_lst=[]
for i in dropout:
    dropout_lst.append(i)
batch_size=int(file.readline())
epochs=int(file.readline())
learning_rate=float(file.readline())
optim_ref_str=transform_format(file.readline())
criter_ref=transform_format(file.readline())
print_every=int(file.readline())
patience=int(file.readline())
mode=transform_format(file.readline())


'rgrss'

In [4]:
def transform_format(string):
		string = string.replace("[","")
		string = string.replace("]","")
		string = string.replace(" ","")
		string = string.replace("\n","")
		string = string.replace("\"","")
		string = string.replace("'","")
		if "," in string or ',' in string:
			string = string.split(",")
		return string


In [27]:
import torch
from torch import nn

In [35]:
loss = nn.MSELoss()
input = torch.randn(16, 10, requires_grad=True)
target = torch.randn(10)
output = loss(input, target)
output.backward()

  return F.mse_loss(input, target, reduction=self.reduction)


In [34]:
input.view(10,-1).shape

torch.Size([10, 16])

In [37]:
layers = [1, 16, 32, 64, 128]

pairwise_layers = lambda layers: list(zip(layers, layers[1:]))

result = pairwise_layers(layers)

print(result)


[(1, 16), (16, 32), (32, 64), (64, 128)]


In [37]:
layers = [1, 16, 32, 64, 128]

pairwise_layers = lambda layers: list(zip(layers, layers[1:]))

result = pairwise_layers(layers)

print(result)


[(1, 16), (16, 32), (32, 64), (64, 128)]


In [41]:
## define 

In [50]:
import os,sys,copy,time
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
criterion_funcs = {"nlll":nn.NLLLoss, "mse":nn.MSELoss, "crossentl":nn.CrossEntropyLoss, "bcewll":nn.BCEWithLogitsLoss}
activation_funcs = {"relu":F.relu, "sigmoid":F.sigmoid}#, F.log_softmax, F.softmax}
optimization_funcs = {"adam":optim.Adam, "sgd":optim.SGD}

# ==== BEGIN Descriptors ====
class Network_Descriptor: # In this class we define all the elements than conform the descriptor of a Neural Network
	# This attributes are common to all the neural network descriptors, no matter their type
	# === BEGIN Attributes ===
	# * hidden_fc_layers = contains the list with the number of neurons for each hidden fully connected layer
	# * input_dim = contains the number of input neurons of the very first layer (the number of features of each instance)
	# * output_dim = contains the number of output neurons of the very last layer (the number of classes to classify)
	# * act_functions = contains the activation functions
	# * batch_size = contains the size of the batches
	# * dropout = contains the number corresponding to the dropout of the network
	# * epochs = contains the number of times the entire training set is trained
	# * learning_rate = contains the learning rate of the network
	# * optim_ref = contains the reference to the optimization function
 	# * criter_ref = contains the reference to the criterion
 	# * print_every = defines how often the running loss will be printed
 	# * patience = defines how many times in a row the current running loss can be greater than the previous running loss
	# === END Attributes ===
	def __init__(self, hidden_fc_layers, input_dim, output_dim, act_funcs_ref, dropout, \
		batch_size, epochs, learning_rate, optim_ref, criter_ref, print_every, patience):
		
		self.hidden_fc_layers = hidden_fc_layers
		self.input_dim = input_dim
		self.output_dim = output_dim
		self.act_functions_ref = act_funcs_ref
		self.batch_size = batch_size
		self.dropout = dropout
		self.learning_rate = learning_rate
		self.epochs = epochs
		self.optim_ref = optim_ref
		self.criter_ref = criter_ref
		self.print_every = print_every
		self.patience = patience

	def change_hidden_fc_layers(self, hidden_fc_layers):
		self.hidden_fc_layers = hidden_fc_layers
	def change_input_dim(self,input_dim):
		self.input_dim = input_dim
	def change_output_dim(self,output_dim):
		self.output_dim = output_dim
	def change_act_functions_ref(self,act_functions_ref):
		self.act_functions_ref = act_functions_ref
	def change_batch_size(self,batch_size):
		self.batch_size = batch_size
	def change_dropout(self,dropout):
		self.dropout = dropout
	def change_learning_rate(self, learning_rate):
		self.learning_rate = learning_rate
	def change_epochs(self, epochs):
		self.epochs = epochs
	def change_optim_ref(self, change_optim_ref):
		self.optim_ref = change_optim_ref
	def change_criter_ref(self, change_criter_ref):
		self.criter_ref = change_criter_ref
	def change_print_every(self, print_every):
		self.print_every = print_every
	def change_patience(self, patience):
		self.patience = patience

	# Transforms the data from the txt-native format to Evotorch format
	def transform_format(self, string):
		string = string.replace("[","")
		string = string.replace("]","")
		string = string.replace(" ","")
		string = string.replace("\n","")
		string = string.replace("\"","")
		string = string.replace("'","")
		if "," in string or ',' in string:
			string = string.split(",")
		return string


In [51]:
class CNN_Descriptor(Network_Descriptor):
	# === BEGIN Attributes ===
	# * conv_layers = contains the list with the number of channels for each convolutional layer
	# * kernel_sizes = defines the sizes of the kernels. It must have these shape: [ [x,y], [x,y],...,[x,y] ]
	# where x defines the kernel size of the i-th convolutional layer and y defines the kernel size of the i-th pooling layer
	# * conv_stride_sizes = defines the sizes of the strides of the convolutional layer. It must have these shape: [x,x,x,...]
	# where x defines the stride size of the i-th convolutional layer
	# === END Attributes ===
	def __init__(self, hidden_fc_layers = [5,5], input_dim = 50, output_dim = 1, \
		activation_funcs_ref = "relu_all", dropout = [0.5,0.5], batch_size = 1, epochs = 1, learning_rate = 0.1,\
		optim_ref = "adam", criter_ref =  "nlll", print_every = 40, patience = 5, conv_layers = [1,32,64], \
		kernel_sizes = [ [5,2],[5,2] ], conv_stride_sizes = [1,1]):	
		
		super().__init__(hidden_fc_layers, input_dim, output_dim, activation_funcs_ref, \
			dropout, batch_size, epochs, learning_rate, optim_ref, criter_ref, print_every, patience)
		
		self.conv_layers = conv_layers # [ number of filters on each convolution layer ]
		self.kernel_sizes = kernel_sizes # [ [kernel size of convolution layer, kernel size of pooling layer] ]
		self.conv_stride_sizes = conv_stride_sizes # [ [stride size of convolution layer, stride size of pooling layer] ]

	def change_conv_layers(self, conv_layers):
		self.conv_layers = conv_layers
	def change_kernel_sizes(self,kernel_sizes):
		self.kernel_sizes = kernel_sizes
	def change_conv_stride_sizes(self, conv_stride_sizes):
		self.conv_stride_sizes = conv_stride_sizes


	# We save the NN's information on a text file we passed in file_name
	def save_NN_info(self, file_name):
		f = open(file_name, "w")
		info = str(self.hidden_fc_layers)+"\n"+str(self.input_dim)+"\n"+str(self.output_dim)+"\n"+\
		str(self.act_functions_ref)+"\n"+str(self.dropout)+"\n"+str(self.batch_size)+"\n"+str(self.epochs)+"\n"+str(self.learning_rate)+"\n"+\
		str(self.optim_ref)+"\n"+str(self.criter_ref)+"\n"+str(self.print_every)+"\n"+str(self.patience)+"\n"+\
		str(self.conv_layers)+"\n"+str(self.kernel_sizes)+"\n"+str(self.conv_stride_sizes)
		f.write(info)
		f.close()


	# We load the NN's information from a text file we passed in file_name
	def load_NN_info(self, file_name):
		f = open(file_name, "r")
		hidden_fc_layers_str = self.transform_format(f.readline())
		self.hidden_fc_layers = []
		if type(hidden_fc_layers_str) == list:
			for i in hidden_fc_layers_str:
				self.hidden_fc_layers.append(int(i))
		else:
			self.hidden_fc_layers.append(int(hidden_fc_layers_str))

		self.input_dim = int(f.readline())
		self.output_dim = int(f.readline())
		
		act_functions_refs_str = self.transform_format(f.readline())
		if act_functions_refs_str == "relu_all":
			self.act_functions_ref = "relu_all"
		elif act_functions_refs_str == "sigmoid_all":
			self.act_functions_ref = "sigmoid_all"
		else:
			self.act_functions_ref = []
			if type(act_functions_refs_str) == list:
				for i in act_functions_refs_str:
					self.act_functions_ref.append(i)
			else:
				self.act_functions_ref.append(act_functions_refs_str)
		dropout_refs_str = self.transform_format(f.readline())
		self.dropout = []
		if type(dropout_refs_str) == list:
			for i in dropout_refs_str:
				self.dropout.append(float(i))
		else:
			self.dropout.append(float(dropout_refs_str))

		self.batch_size = int(f.readline())
		self.epochs = int(f.readline())
		self.learning_rate = float(f.readline())

		optim_ref_str = self.transform_format(f.readline())
		self.optim_ref = optim_ref_str

		criter_ref_str = self.transform_format(f.readline())
		self.criter_ref = criter_ref_str

		self.print_every = int(f.readline())
		self.patience = int(f.readline())


		self.conv_layers = []
		conv_layers_str = self.transform_format(f.readline())
		for i in conv_layers_str:
			self.conv_layers.append(int(i))

		self.kernel_sizes = []
		kernel_sizes_str = self.transform_format(f.readline())
		# As it is a list with this form [ [x,y], [x,y],..., [x,y]] we use the zipping
		kernel_size_pairs = zip(kernel_sizes_str[0::2], kernel_sizes_str[1::2])

		self.kernel_sizes  = [ [int(i),int(j)] for i,j in kernel_size_pairs]

		self.conv_stride_sizes = []

		stride_sizes_str = self.transform_format(f.readline())

		for i in stride_sizes_str:
			self.conv_stride_sizes.append(int(i))
# ==== END Descriptors ====


In [52]:
class Network(nn.Module):
	# This attributes are common to all the neural networks, no matter their type
	# === BEGIN Attributes ===
	# * descriptor = contains the network descriptor
	# * dropout = contains the dropout layer of the network
	# * act_functions = contains the activation functions
	# * criterion = contains the loss function of the network
	# === END Attributes ===

	def __init__(self,Network_Descriptor):
		
		super().__init__()

		torch.set_default_dtype(torch.float64)

		self.descriptor = Network_Descriptor
		self.dropout = nn.ModuleList([nn.Dropout(p = i) for i in self.descriptor.dropout]) # the length has to be == len(hidden_fc_layers)

		self.act_functions = []

		if self.descriptor.act_functions_ref == "relu_all":
			for i in range(len(self.descriptor.hidden_fc_layers)):
				self.act_functions.append(F.relu)
		elif self.descriptor.act_functions_ref == "sigmoid_all":
			for i in range(len(self.descriptor.hidden_fc_layers)):
				self.act_functions.append(F.sigmoid)
		else:
			for i in self.descriptor.act_functions_ref: # the length of self.descriptor.act_functions_ref has to be == len(hidden_fc_layers)
				self.act_functions.append(activation_funcs[i])

		self.criterion = criterion_funcs[self.descriptor.criter_ref]()#reduction='sum', size_average=False)

	def predict(self, x):
		return self(x)

# Saves the values of the NN's hiperparameters in the file. In other words, it saves the values of the NN's descriptor
	def save_NN_info(self, file_name): 
		self.descriptor.save_NN_info(file_name)

# Saves the NN with all the weights, biases and parameters
	def save_NN(self, file_name): 
		torch.save(self.state_dict(), file_name)

# Loads the NN with all the weights, biases and parameters
	def load_NN(self, file_name, strictt = False):  
		self.load_state_dict(torch.load(file_name), strict=strictt)

In [53]:
class CNN_Network(Network):
	# === BEGIN Attributes ===
	# * conv_layers = contains the sequences of (convolutional layers + relu layers + MaxPooling layers)
	# * _to_linear = contains the number of neurons that the hidden fc layer connected to the las conv_layer has
	# * hidden_fc_layers = contains the hidden fully connected layers
	# * output = contains the last layer of the network, the one which computes the prediction
	# * optimizer = contains the optimization algorithm's instance
	# === END Attributes ===
	def __init__(self,network_descriptor):

		super().__init__(network_descriptor)

		layer_sizes = zip(self.descriptor.conv_layers[:-1], self.descriptor.conv_layers[1:])
		# When we define a convolutional layer what we are really defining is a sequence of three layers:
		# Convolutional layer + Relu layer + MaxPooling layer
		
		self.conv_layers = nn.ModuleList([nn.Sequential(
			nn.Conv2d(h1, h2, kernel_size = self.descriptor.kernel_sizes[i][0], stride = self.descriptor.conv_stride_sizes[i], \
				padding = 0),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size = self.descriptor.kernel_sizes[i][1])) for i,(h1, h2) in enumerate(layer_sizes) ] )

		# # This random value is created in order to get the self._to_linear value, which is very useful
		x = torch.randn(self.descriptor.input_dim, self.descriptor.input_dim).view(-1,1, \
			self.descriptor.input_dim,self.descriptor.input_dim)
		# This variable transforms the data we have passed through the convolution function so that we can
		# forward-pass it
		self._to_linear = None
		# # Here we get the value of self._to_linear, which we will use in order to shape the size of the data correctly
		try:
			self.convs(x)
			# a = self.convs_sizes(x.shape[3])
			# print(r.shape[3], a)
			# size_is_1 = False
			# numb_of_convl = len(self.descriptor.kernel_sizes)
			# i = 0
			# _size_ = list(x.size())[3]
			# while not size_is_1 and i < numb_of_convl:
			# 	_size_ = self.get_tensor_sz_after_convpool(list(x.size())[3])
			# 	print(i, numb_of_convl)
			# 	if _size_ <= 1 or self.descriptor.kernel_sizes[i][0]>=_size_ or self.descriptor.kernel_sizes[i][1]>=_size_:
			# 		size_is_1 = True
			# 		self.conv_layers = self.conv_layers[:i+1]
			# 		self.descriptor.conv_layers = self.descriptor.conv_layers[:i+1]
			# 		self.descriptor.kernel_sizes = self.descriptor.kernel_sizes[:i]
			# 		self.descriptor.conv_stride_sizes = self.descriptor.conv_stride_sizes[:i]
			# 		numb_of_convl = len(self.descriptor.conv_layers)
			# 	else:
			# 		i+=1
			# self._to_linear = x.shape[1]*x.shape[2]*self.convs_sizes(list(x.size())[3])
		except Exception as e:
			raise Exception("Convolution failed!")

		# Above, the process is the same as in MLP_Network and fills the same purpose
		self.hidden_fc_layers = nn.ModuleList([nn.Linear(self._to_linear, self.descriptor.hidden_fc_layers[0])])

		layer_sizes = zip( self.descriptor.hidden_fc_layers[:-1], self.descriptor.hidden_fc_layers[1:] )

		self.hidden_fc_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])

		self.output = nn.Linear(self.descriptor.hidden_fc_layers[-1], self.descriptor.output_dim)
		# We initialize the optimizer
		self.optimizer = optimization_funcs[self.descriptor.optim_ref](self.parameters(), \
			lr = self.descriptor.learning_rate)#, weight_decay=0.01)
		# print("CNN initialization successful")

# Here we load the hiperparameters from a file we passed as an argument (file_name). 
# file_name also includes the path
	def load_NN_info(self, file_name): 
		self.descriptor.load_NN_info(file_name)
		self.__init__(self.descriptor)

	def get_conv_output_size(self, x_size, conv_kernel_size, conv_stride_size):
		# ((W1−F  +2P)/S)  +1
		# [(I – F +2 *P) / S] +1 x D | I: input , F: filter size, P: pooling, D: number of feature maps
		# P == pooling == 0
		return ((x_size-conv_kernel_size)/conv_stride_size)+1 

	def get_pool_output_size(self, x_size, pool_kernel_size):
		# (W1−F)/S  +1
		# # [(I – F) / S] + 1 x D | I: input , F: filter size, D: number of feature maps
		# pool_kernel_size == pool_stride_size	
		return ((x_size-pool_kernel_size)/pool_kernel_size)+1

	def get_convpool_size(self, x_size, conv_kernel_size, conv_stride_size, pool_kernel_size):
		x_sz = self.get_conv_output_size(x_size, conv_kernel_size, conv_stride_size)
		x_sz = self.get_pool_output_size(x_sz, pool_kernel_size)
		return x_sz


	def get_tensor_sz_after_convpool(self, x_size):
		for i in range(len(self.descriptor.kernel_sizes)):
			x_size = int(self.get_convpool_size(x_size, self.descriptor.kernel_sizes[i][0],\
				self.descriptor.conv_stride_sizes[i], self.descriptor.kernel_sizes[i][1]))
		return x_size




	def convs_sizes(self, x):
		original_x = copy.deepcopy(x)
		# print("Estos son los conv layers ", self.conv_layers)
		for i,conv in enumerate(self.conv_layers):
			x = self.get_tensor_sz_after_convpool(x)
			# print( "Size of the convolution ", self.get_conv_output_size(x) ,list(x.size()))
			if x <= 1 or self.descriptor.kernel_sizes[i][0]>=x or self.descriptor.kernel_sizes[i][1]>=x:
				# print("Da tamaino 1 con conv_layers ", self.descriptor.conv_layers)
				self.conv_layers = self.conv_layers[:-1]
				self.descriptor.conv_layers = self.descriptor.conv_layers[:-1]
				self.descriptor.kernel_sizes = self.descriptor.kernel_sizes[:-1]
				self.descriptor.conv_stride_sizes = self.descriptor.conv_stride_sizes[:-1]
				x = self.convs_sizes(original_x)
				break
		return x



# In this function the convolution and the pooling take place
	def convs(self, x):
		original_x = copy.deepcopy(x)
		for i,conv in enumerate(self.conv_layers):
			x = conv(x)
			_size_ = list(x.size())[3]
			if _size_ <= 1 or self.descriptor.kernel_sizes[i][0]>=_size_ or self.descriptor.kernel_sizes[i][1]>=_size_:
				# print("Da tamaino 1 con conv_layers ", self.descriptor.conv_layers)
				self.conv_layers = self.conv_layers[:-1]
				self.descriptor.conv_layers = self.descriptor.conv_layers[:-1]
				self.descriptor.kernel_sizes = self.descriptor.kernel_sizes[:-1]
				self.descriptor.conv_stride_sizes = self.descriptor.conv_stride_sizes[:-1]
				x = self.convs(original_x)
				break
		if self._to_linear is None:
			self._to_linear = x.shape[1]*x.shape[2]*x.shape[3]
		return x


# Note that tensor.shape is an alias to tensor.size(), though tensor.shape is an
# attribute of the tensor in question whereas tensor.size() is a function.

	def forward(self, x):
		# We make the convolution
		x = self.convs(x)
		x = x.reshape(x.size(0), -1) # This line transforms x's dimensionality into: [batch_size, self._to_linear]
		# # We put the data on its corresponding format (I THINK IT IS DEPRECATED)
		# x = x.view(-1,1, self._to_linear)
		# We do the forward pass
		for linear in range(len(self.hidden_fc_layers)):
			x = self.act_functions[linear](self.hidden_fc_layers[linear](x))
			x = self.dropout[linear](x)
		x = self.output(x) # This is the output layer, so we dont apply the actvation function	
		return F.log_softmax(x, dim=1)



	def training_CNN(self,training_data):
		
		steps = 0 
		running_loss = 0.0
		patience_count = 0
		halt = False
		last_loss = float("Inf")
		running_losses = []
		epochs = []
		self.optimizer.zero_grad()

		print("======== TRAINING PHASE ========\n\n")

		for e in range(self.descriptor.epochs):
			
			print("EPOCH ", e)

			for batch in training_data:
				values, labels = batch
				# We will have to delete the following two lines
				# values = values.double()
				# labels = labels.long()

				values = values.view(-1, 1, self.descriptor.input_dim, self.descriptor.input_dim)
				# We put the gradients to zero
				self.optimizer.zero_grad()
				# We update the number of steps		
				steps+=1

				output = self.predict(values.double())#output = self(values.double())
				# We transform the dimensions of the output
				output = output.view(-1,self.descriptor.output_dim)
				# We compute the loss
				if list(output.size())[0] != list(labels.size())[0]:
					output = output.view(self.descriptor.batch_size, -1)
                
				loss = self.criterion(output, labels)
				# We do the backpropagation
				loss.backward()
				# We apply the changes
				self.optimizer.step()
				# We obtain the running loss (the actual loss)
				running_loss = loss.item() # This is the loss of the current batch, with which we are going to compute the overfitting

				# If the running loss has kept rising in self.descriptor.patience number of steps then
				# we halt the training
				if running_loss > last_loss or running_loss == last_loss:
					patience_count += 1
				else:
					patience_count = 0.0
				# If the patience is reached, the training is halted
				if patience_count == self.descriptor.patience:
					halt = True
				# We keep record of the running loss for the next step
				last_loss = copy.copy(running_loss)

				# The code within this is statement has the purpose of printing the running loss
				# We print the running loss
				if steps % self.descriptor.print_every == 0:	
					print("Running loss ", running_loss)
				# If the patience is reached, the training is halted
				if halt:
					break			
			# If the patience is reached, the training is halted
			if halt:
				break
			epochs.append(e)
			running_losses.append(running_loss)
		# if halt:
		# 	print("Overfitting occurred!")
		return epochs, running_losses
	# def training_CNN2(self, testloader):


	def testing_CNN(self,testloader):
		# We initialize the test loss
		test_loss = 0
		# We initialize the number of correct predictions
		correct = 0
		# We initialize the number of total predictions
		total = 0

		# print("======== TESTING PHASE ========\n\n")
		for batch in testloader:
		# for i in tqdm(range(0, len(testing_data), self.descriptor.batch_size)): (deprecated)
			# We get the values and the labels
			values, labels = batch
			
			# We will have to delete the following two lines
			# values = values.double()
			# labels = labels.long()
			values = values.view(-1, 1, self.descriptor.input_dim, self.descriptor.input_dim)

			output = self.predict(values.double())#output = self(values.double())
			# We redimension the output
			output = output.view(-1,self.descriptor.output_dim)

			if list(output.size())[0] != list(labels.size())[0]:
				output = output.view(self.descriptor.batch_size, -1)
			# We get the predictions
			_, predicted = torch.max(output.data, 1) # We get the index
			# print("Predicted ", predicted, "labels ", labels)
			# We get the total number of instances
			total += labels.size(0)
			# We get the how many predictions were correct
			correct += (predicted == labels).sum().item()
			# print("Corrects: ", correct)

		# we get, print and return the accuracy
		result = round(correct/total,3) # We get the accuracy
		# print(correct," correct out of ", total)
		# print("Accuracy: ", result)
		return result
# ==== END Implementators ====



In [54]:
n_network = CNN_Network(CNN_Descriptor())


In [55]:
n_network

CNN_Network(
  (dropout): ModuleList(
    (0-1): 2 x Dropout(p=0.5, inplace=False)
  )
  (criterion): NLLLoss()
  (conv_layers): ModuleList(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (hidden_fc_layers): ModuleList(
    (0): Linear(in_features=5184, out_features=5, bias=True)
    (1): Linear(in_features=5, out_features=5, bias=True)
  )
  (output): Linear(in_features=5, out_features=1, bias=True)
)

In [56]:
NN_info_file = "cnn.txt"
n_network.load_NN_info(NN_info_file)

In [58]:
n_network.conv_layers

ModuleList()