In [None]:
import numpy as np
import torch
import torch.nn as nn

LAYER_TYPES = {
    "conv": 1,
    "pool": 2,
    "fc": 3,
    "flatten": 4
}
MAX_LAYERS = 10
LAYER_VECTOR_SIZE = 7
TOTAL_VECTOR_SIZE = MAX_LAYERS * LAYER_VECTOR_SIZE

In [None]:
import numpy as np
import torch
import torch.nn as nn

LAYER_TYPES = {
    "conv": 1,
    "pool": 2,
    "fc": 3,
    "flatten": 4
}
MAX_LAYERS = 10
LAYER_VECTOR_SIZE = 7
TOTAL_VECTOR_SIZE = MAX_LAYERS * LAYER_VECTOR_SIZE

def vectorize_cnn(model):
    vector = np.zeros(TOTAL_VECTOR_SIZE)
    layer_index = 0

    for name, layer in model.named_children():
      if layer_index >= MAX_LAYERS:
        break

      layer_vector = np.zeros(LAYER_VECTOR_SIZE)
      if isinstance(layer, nn.Conv2d):
        layer_vector[0] = LAYER_TYPES["conv"]
        layer_vector[1] = layer.out_channels
        layer_vector[2] = layer.kernel_size[0]
        layer_vector[3] = layer.stride[0]
        layer_vector[4] = layer.padding[0]
      elif isinstance(layer, nn.MaxPool2d):
        layer_vector[0] = LAYER_TYPES["pool"]
        layer_vector[1] = layer.kernel_size
        layer_vector[2] = layer.stride
      elif isinstance(layer, nn.Linear):
        layer_vector[0] = LAYER_TYPES["fc"]
        layer_vector[1] = layer.out_features
      elif isinstance(layer, nn.Flatten):
        layer_vector[0] = LAYER_TYPES["flatten"]

      vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE] = layer_vector
      layer_index+=1

    return vector

def create_cnn_from_vector(vector):
    layers = []
    current_out_channels = 1
    for layer_index in range(MAX_LAYERS):
        layer_vector = vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE]
        layer_type = int(layer_vector[0])
        if layer_type == 0:
           continue
        elif layer_type == LAYER_TYPES["conv"]:
            filters = int(layer_vector[1])
            kernel_size = int(layer_vector[2])
            stride = int(layer_vector[3])
            padding = int(layer_vector[4])
            layers.append(nn.Conv2d(current_out_channels, filters, kernel_size, stride=stride, padding=padding))
            current_out_channels = filters
        elif layer_type == LAYER_TYPES["pool"]:
          kernel_size = int(layer_vector[1])
          stride = int(layer_vector[2])
          layers.append(nn.MaxPool2d(kernel_size, stride=stride))
        elif layer_type == LAYER_TYPES["flatten"]:
            layers.append(nn.Flatten())
        elif layer_type == LAYER_TYPES["fc"]:
          out_features = int(layer_vector[1])

          in_features = None
          for i in range(len(layers)-1, -1, -1):
             if isinstance(layers[i], nn.Conv2d):
                 in_features = int(np.prod(layers[i].out_channels))
                 break
          if in_features is None:
            in_features = current_out_channels * 28 * 28
          layers.append(nn.Linear(in_features, out_features))
          current_out_channels = out_features
    return nn.Sequential(*layers)

model = nn.Sequential(
    nn.Conv2d(1, 32, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(64 * 7 *7, 128),
    nn.Linear(128, 10)
)
vector = vectorize_cnn(model)
print("Векторное представление:", vector)
recovered_model = create_cnn_from_vector(vector)
print("Восстановленная архитектура:", recovered_model)

def vectorize_cnn(model):
    vector = np.zeros(TOTAL_VECTOR_SIZE)
    layer_index = 0

    for name, layer in model.named_children():
      if layer_index >= MAX_LAYERS:
        break

      layer_vector = np.zeros(LAYER_VECTOR_SIZE)
      if isinstance(layer, nn.Conv2d):
        layer_vector[0] = LAYER_TYPES["conv"]
        layer_vector[1] = layer.out_channels
        layer_vector[2] = layer.kernel_size[0]
        layer_vector[3] = layer.stride[0]
        layer_vector[4] = layer.padding[0]
      elif isinstance(layer, nn.MaxPool2d):
        layer_vector[0] = LAYER_TYPES["pool"]
        layer_vector[1] = layer.kernel_size
        layer_vector[2] = layer.stride
      elif isinstance(layer, nn.Linear):
        layer_vector[0] = LAYER_TYPES["fc"]
        layer_vector[1] = layer.out_features
      elif isinstance(layer, nn.Flatten):
        layer_vector[0] = LAYER_TYPES["flatten"]

      vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE] = layer_vector
      layer_index+=1

    return vector

def create_cnn_from_vector(vector):
    layers = []
    current_out_channels = 1
    for layer_index in range(MAX_LAYERS):
        layer_vector = vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE]
        layer_type = int(layer_vector[0])
        if layer_type == 0:
           continue
        elif layer_type == LAYER_TYPES["conv"]:
            filters = int(layer_vector[1])
            kernel_size = int(layer_vector[2])
            stride = int(layer_vector[3])
            padding = int(layer_vector[4])
            layers.append(nn.Conv2d(current_out_channels, filters, kernel_size, stride=stride, padding=padding))
            current_out_channels = filters
        elif layer_type == LAYER_TYPES["pool"]:
          kernel_size = int(layer_vector[1])
          stride = int(layer_vector[2])
          layers.append(nn.MaxPool2d(kernel_size, stride=stride))
        elif layer_type == LAYER_TYPES["flatten"]:
            layers.append(nn.Flatten())
        elif layer_type == LAYER_TYPES["fc"]:
          out_features = int(layer_vector[1])

          in_features = None
          for i in range(len(layers)-1, -1, -1):
             if isinstance(layers[i], nn.Conv2d):
                 in_features = int(np.prod(layers[i].out_channels))
                 break
          if in_features is None:
            in_features = current_out_channels * 28 * 28
          layers.append(nn.Linear(in_features, out_features))
          current_out_channels = out_features
    return nn.Sequential(*layers)

model = nn.Sequential(
    nn.Conv2d(1, 32, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(64 * 7 *7, 128),
    nn.Linear(128, 10)
)
vector = vectorize_cnn(model)
print("Векторное представление:", vector)
recovered_model = create_cnn_from_vector(vector)
print("Восстановленная архитектура:", recovered_model)

model_layers = list(model.children())
recovered_layers = list(recovered_model.children())
for original, recovered in zip(model_layers, recovered_layers):
  print("Original Layer: ", original)
  print("Recovered Layer: ", recovered)
  if type(original) == type(recovered):
    if isinstance(original, nn.Conv2d):
        print(original.out_channels == recovered.out_channels)
    if isinstance(original, nn.Linear):
        print(original.out_features == recovered.out_features)

In [None]:
def vectorize_cnn(model):
    vector = np.zeros(TOTAL_VECTOR_SIZE)
    layer_index = 0

    for name, layer in model.named_children():
      if layer_index >= MAX_LAYERS:
        break

      layer_vector = np.zeros(LAYER_VECTOR_SIZE)
      if isinstance(layer, nn.Conv2d):
        layer_vector[0] = LAYER_TYPES["conv"]
        layer_vector[1] = layer.out_channels
        layer_vector[2] = layer.kernel_size[0]
        layer_vector[3] = layer.stride[0]
        layer_vector[4] = layer.padding[0]
      elif isinstance(layer, nn.MaxPool2d):
        layer_vector[0] = LAYER_TYPES["pool"]
        layer_vector[1] = layer.kernel_size
        layer_vector[2] = layer.stride
      elif isinstance(layer, nn.Linear):
        layer_vector[0] = LAYER_TYPES["fc"]
        layer_vector[1] = layer.out_features
      elif isinstance(layer, nn.Flatten):
        layer_vector[0] = LAYER_TYPES["flatten"]

      vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE] = layer_vector
      layer_index+=1

    return vector

def create_cnn_from_vector(vector):
    layers = []
    current_out_channels = 1
    for layer_index in range(MAX_LAYERS):
        layer_vector = vector[layer_index * LAYER_VECTOR_SIZE:(layer_index + 1) * LAYER_VECTOR_SIZE]
        layer_type = int(layer_vector[0])
        if layer_type == 0:
           continue
        elif layer_type == LAYER_TYPES["conv"]:
            filters = int(layer_vector[1])
            kernel_size = int(layer_vector[2])
            stride = int(layer_vector[3])
            padding = int(layer_vector[4])
            layers.append(nn.Conv2d(current_out_channels, filters, kernel_size, stride=stride, padding=padding))
            current_out_channels = filters
        elif layer_type == LAYER_TYPES["pool"]:
          kernel_size = int(layer_vector[1])
          stride = int(layer_vector[2])
          layers.append(nn.MaxPool2d(kernel_size, stride=stride))
        elif layer_type == LAYER_TYPES["flatten"]:
            layers.append(nn.Flatten())
        elif layer_type == LAYER_TYPES["fc"]:
          out_features = int(layer_vector[1])

          in_features = None
          for i in range(len(layers)-1, -1, -1):
             if isinstance(layers[i], nn.Conv2d):
                 in_features = int(np.prod(layers[i].out_channels))
                 break
          if in_features is None:
            in_features = current_out_channels * 28 * 28
          layers.append(nn.Linear(in_features, out_features))
          current_out_channels = out_features
    return nn.Sequential(*layers)

In [None]:
model = nn.Sequential(
    nn.Conv2d(1, 32, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3, padding=1),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(64 * 7 *7, 128),
    nn.Linear(128, 10)
)
vector = vectorize_cnn(model)
print("Векторное представление:", vector)
recovered_model = create_cnn_from_vector(vector)
print("Восстановленная архитектура:", recovered_model)

Векторное представление: [  1.  32.   3.   1.   1.   0.   0.   2.   2.   2.   0.   0.   0.   0.
   1.  64.   3.   1.   1.   0.   0.   2.   2.   2.   0.   0.   0.   0.
   4.   0.   0.   0.   0.   0.   0.   3. 128.   0.   0.   0.   0.   0.
   3.  10.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
Восстановленная архитектура: Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Flatten(start_dim=1, end_dim=-1)
  (5): Linear(in_features=64, out_features=128, bias=True)
  (6): Linear(in_features=64, out_features=10, bias=True)
)


In [None]:
model_layers = list(model.children())
recovered_layers = list(recovered_model.children())
for original, recovered in zip(model_layers, recovered_layers):
  print("Original Layer: ", original)
  print("Recovered Layer: ", recovered)
  if type(original) == type(recovered):
    if isinstance(original, nn.Conv2d):
        print(original.out_channels == recovered.out_channels)
    if isinstance(original, nn.Linear):
        print(original.out_features == recovered.out_features)

Original Layer:  Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Recovered Layer:  Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
True
Original Layer:  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Recovered Layer:  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Original Layer:  Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Recovered Layer:  Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
True
Original Layer:  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Recovered Layer:  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Original Layer:  Flatten(start_dim=1, end_dim=-1)
Recovered Layer:  Flatten(start_dim=1, end_dim=-1)
Original Layer:  Linear(in_features=3136, out_features=128, bias=True)
Recovered Layer:  Linear(in_features=64, out_features=128, bias=True)
True
Original Layer:  Linear(in_features=128, 

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import optuna

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
class CNN(nn.Module):
    def __init__(self, num_layers, filters_base, hidden_size, dropout_rate):
        super(CNN, self).__init__()
        self.features = nn.Sequential()
        in_channels = 3
        kernel_size_base = 3
        for i in range(num_layers):
           filters = filters_base * (2 ** i)
           kernel_size = kernel_size_base
           self.features.append(nn.Conv2d(in_channels, filters, kernel_size=kernel_size, padding=1))
           self.features.append(nn.ReLU())
           self.features.append(nn.MaxPool2d(2, 2))
           in_channels = filters
        self.features.append(nn.AdaptiveAvgPool2d((4, 4)))
        self.features.append(nn.Flatten())
        self.classifier = nn.Sequential(
          nn.Linear(in_channels * 4 * 4, hidden_size),
          nn.ReLU(),
          nn.Dropout(dropout_rate),
          nn.Linear(hidden_size, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
def train_and_evaluate(model, optimizer, criterion, trainloader, testloader, epochs=10, device="cpu"):
  model.to(device)
  for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for inputs, labels in testloader:
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  return correct / total

In [None]:
def objective(trial):
    num_layers = trial.suggest_int("num_layers", 1, 5)
    filters_base = trial.suggest_int("filters_base", 16, 64, step=16)
    hidden_size = trial.suggest_int("hidden_size", 128, 512, step=128)
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = CNN(num_layers, filters_base, hidden_size, dropout_rate)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    accuracy = train_and_evaluate(model, optimizer, criterion, trainloader, testloader, epochs=5, device=device)

    return accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

print("Лучшие параметры: ", study.best_params)
print("Лучшая точность: ", study.best_value)

[I 2024-12-26 09:21:57,794] A new study created in memory with name: no-name-4c6d08d4-f9a7-4460-ab52-75660e188dbc
[I 2024-12-26 09:23:11,843] Trial 0 finished with value: 0.539 and parameters: {'num_layers': 2, 'filters_base': 16, 'hidden_size': 384, 'dropout_rate': 0.40564936441436944, 'learning_rate': 0.0001495091712181927}. Best is trial 0 with value: 0.539.
[I 2024-12-26 09:24:29,627] Trial 1 finished with value: 0.7343 and parameters: {'num_layers': 3, 'filters_base': 16, 'hidden_size': 512, 'dropout_rate': 0.4236711628142462, 'learning_rate': 0.001418790270493617}. Best is trial 1 with value: 0.7343.
[I 2024-12-26 09:25:42,894] Trial 2 finished with value: 0.5662 and parameters: {'num_layers': 2, 'filters_base': 16, 'hidden_size': 256, 'dropout_rate': 0.4105061656951673, 'learning_rate': 0.0002819111732909902}. Best is trial 1 with value: 0.7343.
[I 2024-12-26 09:27:09,682] Trial 3 finished with value: 0.6856 and parameters: {'num_layers': 5, 'filters_base': 16, 'hidden_size': 51

Лучшие параметры:  {'num_layers': 4, 'filters_base': 64, 'hidden_size': 384, 'dropout_rate': 0.22747508141425882, 'learning_rate': 0.0006052457571694114}
Лучшая точность:  0.7725
