In [1]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import accelerate
from accelerate import Accelerator
from llama_cpp import Llama
import json
import re
from openai import OpenAI
from torchvision import datasets, transforms
import wandb
from tqdm import tqdm
import time
wandb.login()
device = torch.device('cuda')

[34m[1mwandb[0m: Currently logged in as: [33maleksanderk[0m ([33mnull-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    # List all available GPUs
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No CUDA GPUs are available")

GPU 0: NVIDIA A100-PCIE-40GB
GPU 1: NVIDIA A100-PCIE-40GB


In [3]:
# Define data transformations (normalize the pixel values to the range [0, 1])
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Download and load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

# Download and load the testing dataset
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# Checking the length of the datasets and an example batch
print("Training dataset size:", len(train_dataset))
print("Testing dataset size:", len(test_dataset))

# Example batch from the training loader
data_iter = iter(train_loader)
images, labels = next(data_iter)
print("Example Batch Shape - Images", images.shape, "Labels:", labels.shape)

Training dataset size: 60000
Testing dataset size: 10000
Example Batch Shape - Images torch.Size([64, 1, 28, 28]) Labels: torch.Size([64])


In [4]:
# utility classe to track API usage when there is none
class DummyUsage:
    def __init__(prompt_tokens, completion_tokens):
        self.prompt_tokens = prompt_tokens
        self.completion_tokens = completion_tokens
        
# class to use OpenAI API
class OpenAIModel:
    def __init__(self, model_type):
        self.client = OpenAI(
            api_key='env.API_KEY',
            base_url="https://api.proxyapi.ru/openai/v1",
        )
        self.model_type = model_type
    def completion(self, prompt):
        chat_completion = self.client.chat.completions.create(
            model=self.model_type, messages=[{"role": "user", "content": prompt}]
        )
        return chat_completion.choices[0].message.content, chat_completion.usage

# class to run local models with LLama.cpp
class LocalModel:
    def __init__(self, model_path):
        self.model_path = model_path
        self.gpu_ids = gpu_ids
        self.model = Llama(model_path=self.model_path, n_gpu_layers=-1)
    def completion(self, prompt):
        return self.model(prompt)

In [6]:
# initialize the backend model
LLM_backend = OpenAIModel('gpt-4-turbo')

# example module we can add to the model's arsenal
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride=1):
        super(ResidualBlock, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        # Second convolutional layer
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Shortcut connection (if needed)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        # Main path
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        # Shortcut connection
        out += self.shortcut(x)
        out = self.relu(out)

        return out

def convert_values(data):
    converted_data = {}
    
    for key, value in data.items():
        if type(value) == int or type(value) == float or type(value) == bool:
          converted_data[key] = value
        else:
          try:
              # Try converting the value to a numeric type
              converted_value = eval(value) if not value.startswith("(") and value.lower() != 'true' and value.lower() != 'false' else tuple(map(int, value.strip('()').split(',')))
          except:
              # If it's not a numeric type, check for boolean values
              if value.lower() == 'true':
                  converted_value = True
              elif value.lower() == 'false':
                  converted_value = False
              else:
                  # If conversion fails, keep the original string value
                  converted_value = value
          
          converted_data[key] = converted_value
    
    return converted_data

# builds sequential model from JSON-like config
def build_model(config):
    layer_list = []
    for key in config:
        layer_config = config[key]
        layer_type = layer_config.pop('type')
        if layer_type == 'resblock':
            # do smth
            unpacked_layer_config = convert_values(layer_config)
            layer_list.append(ResidualBlock(**unpacked_layer_config))
        else:
            unpacked_layer_config = convert_values(layer_config)
            layer_list.append(getattr(torch.nn, layer_type)(**unpacked_layer_config))

    return nn.Sequential(*layer_list)

def train_and_evaluate(model, train_loader, test_loader, num_epochs, gpu_ids=None):
    # Setup Accelerator with specific GPUs
    accelerator = Accelerator(device_placement=True, split_batches=False, cpu=False)
    if gpu_ids is not None:
        # Set specific GPUs to use
        accelerator.state.device_ids = gpu_ids

    # Prepare the model, optimizer, and data loaders
    model, optimizer, train_loader, test_loader = accelerator.prepare(model, optim.Adam(model.parameters()), train_loader, test_loader)
    criterion = nn.CrossEntropyLoss()
    summary = ''
    train_accuracy = 0
    test_accuracy = 0
    train_history = []
    test_history = []


    for epoch in tqdm(range(num_epochs)):
        model.train()
        train_correct = 0
        train_total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            loss = criterion(outputs, labels)
            accelerator.backward(loss)
            optimizer.step()


        model.eval()
        test_correct = 0
        test_total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        test_accuracy = test_correct / test_total
        train_accuracy = train_correct / train_total
        train_history.append(train_accuracy)
        test_history.append(test_accuracy)
        epoch_result = f'Epoch {epoch + 1}/{num_epochs}, Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {test_accuracy:.4f}'
        #print(epoch_result)
        summary += epoch_result + '\n'

    return summary, max(train_history), max(test_history)

# initial conditions to initialize the model
data_description = "50K 28x28 grayscale images, each with a label from 10 classes"
task_description = "Image Classification into 10 classes"
# seed model, not strictly necessary
gpt4_model_string = '''
{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "32",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_1": {
    "type": "ReLU"
  },
  "layer_2": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_3": {
    "type": "Conv2d",
    "in_channels": "32",
    "out_channels": "64",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_4": {
    "type": "ReLU"
  },
  "layer_5": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_6": {
    "type": "Flatten",
    "start_dim": "1",
    "end_dim": "-1"
  },
  "layer_7": {
    "type": "Linear",
    "in_features": "64 * 7 * 7",
    "out_features": "128",
    "bias": "true"
  },
  "layer_8": {
    "type": "ReLU"
  },
  "layer_9": {
    "type": "Linear",
    "in_features": "128",
    "out_features": "10",
    "bias": "true"
  }
}
'''

llama8b_model_string = '''
{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "64",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_1": {
    "type": "ReLU"
  },
  "layer_2": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_3": {
    "type": "Conv2d",
    "in_channels": "64",
    "out_channels": "128",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_4": {
    "type": "ReLU"
  },
  "layer_5": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_6": {
    "type": "Conv2d",
    "in_channels": "128",
    "out_channels": "256",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_7": {
    "type": "ReLU"
  },
  "layer_8": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_9": {
    "type": "Flatten",
    "start_dim": "1",
    "end_dim": "-1"
  },
  "layer_10": {
    "type": "Linear",
    "in_features": "256 * 7 * 7",
    "out_features": "128",
    "bias": "true"
  },
  "layer_11": {
    "type": "ReLU"
  },
  "layer_12": {
    "type": "Linear",
    "in_features": "128",
    "out_features": "10",
    "bias": "true"
  }
}
'''

llama70b_model_string = '''
{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "16",
    "kernel_size": "5",
    "stride": "1",
    "padding": "2"
  },
  "layer_1": {
    "type": "ReLU"
  },
  "layer_2": {
    "type": "BatchNorm2d",
    "num_features": "16"
  },
  "layer_3": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_4": {
    "type": "Conv2d",
    "in_channels": "16",
    "out_channels": "32",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_5": {
    "type": "ReLU"
  },
  "layer_6": {
    "type": "BatchNorm2d",
    "num_features": "32"
  },
  "layer_7": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_8": {
    "type": "Flatten",
    "start_dim": "1",
    "end_dim": "-1"
  },
  "layer_9": {
    "type": "Linear",
    "in_features": "32 * 7 * 7",
    "out_features": "128",
    "bias": "true"
  },
  "layer_10": {
    "type": "ReLU"
  },
  "layer_11": {
    "type": "Linear",
    "in_features": "128",
    "out_features": "10",
    "bias": "true"
  }
}
'''

# format to follow when running self-reflection
reflection_format = '''
{
    "differences_between_models": [],
    "new_model_performance_better": bool,
    "why_performance_improved": "",
    "model_overfit": bool,
    "how_to_improve_architecture_further": [],
    "optimizer_choice": "",
    "how_many_epochs_to_choose": int,
    "learning_rate_choice": float,
    "summary": ""
}
'''


def extract_between_tokens(input_string, start_token, end_token):
    start_index = input_string.find(start_token) + len(start_token)
    end_index = input_string.find(end_token, start_index)

    if start_index != -1 and end_index != -1:
        result = input_string[start_index:end_index]
        return result
    else:
        return input_string
    
def remove_comments(json_str):
    # Remove single-line comments
    json_str = re.sub(r'//.*', '', json_str)
    
    # Remove multi-line comments
    json_str = re.sub(r'/\*(.*?)\*/', '', json_str, flags=re.DOTALL)

    return json_str

# main function to generate a new model
def get_new_model(data_description, task_description, model_string, summary, reflection=None):
    prompt = f'''
    You are a deep learning engineer. You will be provided with a model in JSON format, its perfomance, a task and a data description.
    You are only capable of replying with a valid JSON containing a new improved model (same format) that should perform better than the one you were given.
    Warning: all values must be presented as strings
    Warning: output must be valid JSON, you will be penalised for mistakes, such as adding comments or anything to violate that format
    Warning: you will be penalised for overfitting
    Use any Pytorch modules you want or use "layer_...": {{
        "type": "resblock",
        "in_channels": "...",
        "out_channels": "...",
        "kernel_size": "...",
        "stride": "...",
        "padding": "..."
    }} to use a resnet block, substitute ... with the needed values.
  
    Data description:
    {data_description}

    Task description:
    {task_description}

    Old model in JSON format:
    {model_string}

    Old model perfomance:
    {summary}
    
    '''
    if reflection is not None:
        prompt += f'''Consider the following information when making your decision: {reflection}'''
    completion, usage = LLM_backend.completion(prompt)
    proper_string = extract_between_tokens(completion.strip(), "```json", "```")
    clean_proper_string = remove_comments(proper_string)
    return clean_proper_string, usage

# main function to perform error correction
def fix_model(model_string, error):
    prompt = f'''
    You are a deep learning engineer. I will present you with a model represented in JSON form and an error it encountered during training.
    Fix the error and reply only with the fixed model in the same JSON format.
    All values must be formatted as strings and enclosed in double quotes.
    Old model:
    {model_string}
    
    Error:
    {error}
    '''
    completion, usage = LLM_backend.completion(prompt)
    proper_string = extract_between_tokens(completion.strip(), "```json", "```")
    clean_proper_string = remove_comments(proper_string)
    return clean_proper_string, usage

# main function to perform self-reflection
def reflect(data_description, task_description, old_model_string, new_model_string, old_summary, new_summary, reflection_format):
    prompt = f'''
    You are a deep learning engineer. You will be provided with two models in JSON format, their perfomances, a task and a data description.
    You are only capable of replying with a valid JSON containing a brief reflection on the differences between the models and advice on further improvement.
    Warning: you will be penalised for writing code, you must reply with plain text only.
    Warning: you will be penalised for deviating from the format
    What are the differences between the models?
    Did the new one perform better?
    Why?
    Does the model overfit?
    How should the model be changed to improve perfomance?
    Summarise, be very concise.
    Give specific concrete recommendations about changes to the model

    Response format you must fill:
    {reflection_format}
    
    Data description:
    {data_description}

    Task description:
    {task_description}

    Old model in JSON format:
    {old_model_string}

    Old model perfomance:
    {old_summary}

    Current model in JSON format:
    {new_model_string}

    Current model perfomance:
    {new_summary}

    Current settings:
    "optimizer": "Adam"
    "num_epochs": {num_epochs}
    
    '''
    completion, usage = LLM_backend.completion(prompt)
    return completion.strip(), usage


In [None]:
send_price = 2.88 / 1000
receive_price = 8.64 / 1000
context = ''
num_epochs = 25
i = 0
discovered_error = None
test_results = []
train_results = []
times = []
fail_count = 0
num_iters = 40
models = []
summaries = []
gpu_ids = [3]


wandb.init(
    # set the wandb project where this run will be logged
    project="LLM-NAS-Experiments",
    
    # track hyperparameters and run metadata
    config={
    "epochs": num_epochs,
    "iters": num_iters,
    "model": "GPT-4",
    "data_augmentation": "None",
    "pipeline": "Basic"
    }
)

while i < num_iters:
    cost_per_iter = 0
    training_time = None
    test_acc = None
    train_acc = None
    n_params = None
    print('Iteration: ', i)
    i += 1
    # try building and training the model
    try:
        print(model_string)
        model = build_model(json.loads(model_string))
        print(model)
        n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) / 1000000
        print('Number of parameters: ', n_params, 'M')
        start = time.time()
        summary, train_acc, test_acc = train_and_evaluate(model, train_loader, test_loader, num_epochs, gpu_ids=gpu_ids)
        train_results.append(train_acc)
        test_results.append(test_acc)  
        end = time.time()
        print(summary)
        training_time = end-start
        print("Training time: ", training_time)
        times.append(training_time)
        print('\n\n')
        discovered_error = None
        models.append(model_string)
        summaries.append(summary)
    except Exception as e:
        print(e)
        print("Failed to build and train model, attempting to correct")
        fail_count += 1
        discovered_error = e

    # update model
    try:
        if discovered_error is None:
            if len(models) == 1:
                model_string, usage = get_new_model(data_description, task_description, model_string, summary)
                prompt_tokens = usage.prompt_tokens
                completion_tokens = usage.completion_tokens
                cost = prompt_tokens * send_price + completion_tokens * receive_price
                cost_per_iter += cost
          
            else:
                reflection, usage = reflect(data_description, task_description, models[-2], models[-1], summaries[-2], summaries[-1], reflection_format)
                print(reflection)
                prompt_tokens = usage.prompt_tokens
                completion_tokens = usage.completion_tokens
                cost = prompt_tokens * send_price + completion_tokens * receive_price
                cost_per_iter += cost
                model_string, usage = get_new_model(data_description, task_description, model_string, summary, reflection)
                prompt_tokens = usage.prompt_tokens
                completion_tokens = usage.completion_tokens
                cost = prompt_tokens * send_price + completion_tokens * receive_price
                cost_per_iter += cost
        else:
            model_string, usage = fix_model(model_string, discovered_error)
            prompt_tokens = usage.prompt_tokens
            completion_tokens = usage.completion_tokens
            cost = prompt_tokens * send_price + completion_tokens * receive_price
            cost_per_iter += cost
        print("Iteration cost: ", cost_per_iter)

    except Exception as e:
        print(e)
        wandb.finish()
        raise Exception('Failed to connect to API or parse the response')
    if discovered_error == None:
        wandb.log({"test_acc": test_acc, "train_acc": train_acc, "time": training_time, "cost": cost_per_iter, "M_parameters": n_params})
    else:
        wandb.log({"test_acc": 0, "train_acc": 0, "time": 0, "cost": cost_per_iter, "M_parameters": n_params})


wandb.finish()

Iteration:  0

{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "32",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_1": {
    "type": "ReLU"
  },
  "layer_2": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_3": {
    "type": "Conv2d",
    "in_channels": "32",
    "out_channels": "64",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_4": {
    "type": "ReLU"
  },
  "layer_5": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_6": {
    "type": "Flatten",
    "start_dim": "1",
    "end_dim": "-1"
  },
  "layer_7": {
    "type": "Linear",
    "in_features": "64 * 7 * 7",
    "out_features": "128",
    "bias": "true"
  },
  "layer_8": {
    "type": "ReLU"
  },
  "layer_9": {
    "type": "Linear",
    "i

dataloader_config = DataLoaderConfiguration(split_batches=False)
100%|███████████████████████████████████████████| 25/25 [04:26<00:00, 10.68s/it]


Epoch 1/25, Train Accuracy: 0.8455, Validation Accuracy: 0.8843
Epoch 2/25, Train Accuracy: 0.9017, Validation Accuracy: 0.9035
Epoch 3/25, Train Accuracy: 0.9185, Validation Accuracy: 0.9026
Epoch 4/25, Train Accuracy: 0.9279, Validation Accuracy: 0.9192
Epoch 5/25, Train Accuracy: 0.9373, Validation Accuracy: 0.9143
Epoch 6/25, Train Accuracy: 0.9465, Validation Accuracy: 0.9236
Epoch 7/25, Train Accuracy: 0.9539, Validation Accuracy: 0.9113
Epoch 8/25, Train Accuracy: 0.9604, Validation Accuracy: 0.9206
Epoch 9/25, Train Accuracy: 0.9660, Validation Accuracy: 0.9184
Epoch 10/25, Train Accuracy: 0.9725, Validation Accuracy: 0.9260
Epoch 11/25, Train Accuracy: 0.9769, Validation Accuracy: 0.9215
Epoch 12/25, Train Accuracy: 0.9805, Validation Accuracy: 0.9213
Epoch 13/25, Train Accuracy: 0.9834, Validation Accuracy: 0.9197
Epoch 14/25, Train Accuracy: 0.9867, Validation Accuracy: 0.9191
Epoch 15/25, Train Accuracy: 0.9873, Validation Accuracy: 0.9165
Epoch 16/25, Train Accuracy: 0.989

dataloader_config = DataLoaderConfiguration(split_batches=False)


Iteration cost:  7.847999999999999
Iteration:  1
{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "32",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_1": {
    "type": "BatchNorm2d",
    "num_features": "32"
  },
  "layer_2": {
    "type": "ReLU"
  },
  "layer_3": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_4": {
    "type": "resblock",
    "in_channels": "32",
    "out_channels": "64",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_5": {
    "type": "BatchNorm2d",
    "num_features": "64"
  },
  "layer_6": {
    "type": "ReLU"
  },
  "layer_7": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0",
    "dilation": "1",
    "ceil_mode": "false"
  },
  "layer_8": {
    "type": "Flatten",
    "start_dim": "1",
    "end_dim": "-1"
  },
  "layer_9": {
    "typ

100%|███████████████████████████████████████████| 25/25 [05:19<00:00, 12.79s/it]


Epoch 1/25, Train Accuracy: 0.8728, Validation Accuracy: 0.9086
Epoch 2/25, Train Accuracy: 0.9099, Validation Accuracy: 0.9166
Epoch 3/25, Train Accuracy: 0.9237, Validation Accuracy: 0.9196
Epoch 4/25, Train Accuracy: 0.9315, Validation Accuracy: 0.9218
Epoch 5/25, Train Accuracy: 0.9389, Validation Accuracy: 0.9261
Epoch 6/25, Train Accuracy: 0.9438, Validation Accuracy: 0.9286
Epoch 7/25, Train Accuracy: 0.9487, Validation Accuracy: 0.9290
Epoch 8/25, Train Accuracy: 0.9539, Validation Accuracy: 0.9278
Epoch 9/25, Train Accuracy: 0.9582, Validation Accuracy: 0.9283
Epoch 10/25, Train Accuracy: 0.9626, Validation Accuracy: 0.9299
Epoch 11/25, Train Accuracy: 0.9645, Validation Accuracy: 0.9297
Epoch 12/25, Train Accuracy: 0.9679, Validation Accuracy: 0.9275
Epoch 13/25, Train Accuracy: 0.9704, Validation Accuracy: 0.9225
Epoch 14/25, Train Accuracy: 0.9720, Validation Accuracy: 0.9295
Epoch 15/25, Train Accuracy: 0.9748, Validation Accuracy: 0.9299
Epoch 16/25, Train Accuracy: 0.975

100%|███████████████████████████████████████████| 25/25 [06:13<00:00, 14.93s/it]


Epoch 1/25, Train Accuracy: 0.8611, Validation Accuracy: 0.9002
Epoch 2/25, Train Accuracy: 0.9029, Validation Accuracy: 0.9102
Epoch 3/25, Train Accuracy: 0.9132, Validation Accuracy: 0.9112
Epoch 4/25, Train Accuracy: 0.9219, Validation Accuracy: 0.9236
Epoch 5/25, Train Accuracy: 0.9284, Validation Accuracy: 0.9228
Epoch 6/25, Train Accuracy: 0.9331, Validation Accuracy: 0.9281
Epoch 7/25, Train Accuracy: 0.9384, Validation Accuracy: 0.9267
Epoch 8/25, Train Accuracy: 0.9409, Validation Accuracy: 0.9311
Epoch 9/25, Train Accuracy: 0.9449, Validation Accuracy: 0.9315
Epoch 10/25, Train Accuracy: 0.9489, Validation Accuracy: 0.9303
Epoch 11/25, Train Accuracy: 0.9517, Validation Accuracy: 0.9349
Epoch 12/25, Train Accuracy: 0.9544, Validation Accuracy: 0.9334
Epoch 13/25, Train Accuracy: 0.9568, Validation Accuracy: 0.9318
Epoch 14/25, Train Accuracy: 0.9596, Validation Accuracy: 0.9317
Epoch 15/25, Train Accuracy: 0.9618, Validation Accuracy: 0.9305
Epoch 16/25, Train Accuracy: 0.964

  0%|                                                    | 0/25 [00:00<?, ?it/s]

mat1 and mat2 shapes cannot be multiplied (64x256 and 1024x128)
Failed to build and train model, attempting to correct





Iteration cost:  9.39456
Iteration:  4
{
  "layer_0": {
    "type": "Conv2d",
    "in_channels": "1",
    "out_channels": "32",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_1": {
    "type": "BatchNorm2d",
    "num_features": "32"
  },
  "layer_2": {
    "type": "ReLU"
  },
  "layer_3": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0"
  },
  "layer_4": {
    "type": "resblock",
    "in_channels": "32",
    "out_channels": "64",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
  },
  "layer_5": {
    "type": "BatchNorm2d",
    "num_features": "64"
  },
  "layer_6": {
    "type": "ReLU"
  },
  "layer_7": {
    "type": "MaxPool2d",
    "kernel_size": "2",
    "stride": "2",
    "padding": "0"
  },
  "layer_8": {
    "type": "Dropout",
    "p": "0.3"
  },
  "layer_9": {
    "type": "resblock",
    "in_channels": "64",
    "out_channels": "128",
    "kernel_size": "3",
    "stride": "1",
    "padding": "1"
 

100%|███████████████████████████████████████████| 25/25 [07:51<00:00, 18.84s/it]


Epoch 1/25, Train Accuracy: 0.8410, Validation Accuracy: 0.8970
Epoch 2/25, Train Accuracy: 0.8974, Validation Accuracy: 0.9060
Epoch 3/25, Train Accuracy: 0.9090, Validation Accuracy: 0.9076
Epoch 4/25, Train Accuracy: 0.9192, Validation Accuracy: 0.9169
Epoch 5/25, Train Accuracy: 0.9259, Validation Accuracy: 0.9184
Epoch 6/25, Train Accuracy: 0.9305, Validation Accuracy: 0.9241
Epoch 7/25, Train Accuracy: 0.9342, Validation Accuracy: 0.9275
Epoch 8/25, Train Accuracy: 0.9394, Validation Accuracy: 0.9293
Epoch 9/25, Train Accuracy: 0.9440, Validation Accuracy: 0.9269
Epoch 10/25, Train Accuracy: 0.9471, Validation Accuracy: 0.9336
Epoch 11/25, Train Accuracy: 0.9498, Validation Accuracy: 0.9252
Epoch 12/25, Train Accuracy: 0.9530, Validation Accuracy: 0.9325
Epoch 13/25, Train Accuracy: 0.9558, Validation Accuracy: 0.9334
Epoch 14/25, Train Accuracy: 0.9598, Validation Accuracy: 0.9350
Epoch 15/25, Train Accuracy: 0.9616, Validation Accuracy: 0.9291
Epoch 16/25, Train Accuracy: 0.963

100%|███████████████████████████████████████████| 25/25 [08:04<00:00, 19.37s/it]


Epoch 1/25, Train Accuracy: 0.8484, Validation Accuracy: 0.8954
Epoch 2/25, Train Accuracy: 0.8992, Validation Accuracy: 0.9097
Epoch 3/25, Train Accuracy: 0.9137, Validation Accuracy: 0.9095
Epoch 4/25, Train Accuracy: 0.9228, Validation Accuracy: 0.9130
Epoch 5/25, Train Accuracy: 0.9292, Validation Accuracy: 0.9237
Epoch 6/25, Train Accuracy: 0.9344, Validation Accuracy: 0.9300
Epoch 7/25, Train Accuracy: 0.9397, Validation Accuracy: 0.9192
Epoch 8/25, Train Accuracy: 0.9446, Validation Accuracy: 0.9299
Epoch 9/25, Train Accuracy: 0.9492, Validation Accuracy: 0.9320
Epoch 10/25, Train Accuracy: 0.9536, Validation Accuracy: 0.9313
Epoch 11/25, Train Accuracy: 0.9569, Validation Accuracy: 0.9309
Epoch 12/25, Train Accuracy: 0.9619, Validation Accuracy: 0.9352
Epoch 13/25, Train Accuracy: 0.9642, Validation Accuracy: 0.9314
Epoch 14/25, Train Accuracy: 0.9680, Validation Accuracy: 0.9350
Epoch 15/25, Train Accuracy: 0.9711, Validation Accuracy: 0.9315
Epoch 16/25, Train Accuracy: 0.973

100%|███████████████████████████████████████████| 25/25 [07:56<00:00, 19.06s/it]


Epoch 1/25, Train Accuracy: 0.8572, Validation Accuracy: 0.8845
Epoch 2/25, Train Accuracy: 0.9036, Validation Accuracy: 0.9135
Epoch 3/25, Train Accuracy: 0.9163, Validation Accuracy: 0.9141
Epoch 4/25, Train Accuracy: 0.9260, Validation Accuracy: 0.9084
Epoch 5/25, Train Accuracy: 0.9326, Validation Accuracy: 0.9246
Epoch 6/25, Train Accuracy: 0.9393, Validation Accuracy: 0.9205
Epoch 7/25, Train Accuracy: 0.9463, Validation Accuracy: 0.9207
Epoch 8/25, Train Accuracy: 0.9513, Validation Accuracy: 0.9293
Epoch 9/25, Train Accuracy: 0.9557, Validation Accuracy: 0.9278
Epoch 10/25, Train Accuracy: 0.9607, Validation Accuracy: 0.9313
Epoch 11/25, Train Accuracy: 0.9644, Validation Accuracy: 0.9274
Epoch 12/25, Train Accuracy: 0.9689, Validation Accuracy: 0.9313
Epoch 13/25, Train Accuracy: 0.9709, Validation Accuracy: 0.9263
Epoch 14/25, Train Accuracy: 0.9748, Validation Accuracy: 0.9295
Epoch 15/25, Train Accuracy: 0.9769, Validation Accuracy: 0.9285
Epoch 16/25, Train Accuracy: 0.979

100%|███████████████████████████████████████████| 25/25 [07:32<00:00, 18.08s/it]


Epoch 1/25, Train Accuracy: 0.8422, Validation Accuracy: 0.9004
Epoch 2/25, Train Accuracy: 0.8968, Validation Accuracy: 0.9137
Epoch 3/25, Train Accuracy: 0.9094, Validation Accuracy: 0.9070
Epoch 4/25, Train Accuracy: 0.9181, Validation Accuracy: 0.9181
Epoch 5/25, Train Accuracy: 0.9257, Validation Accuracy: 0.9207
Epoch 6/25, Train Accuracy: 0.9294, Validation Accuracy: 0.9249
Epoch 7/25, Train Accuracy: 0.9356, Validation Accuracy: 0.9283
Epoch 8/25, Train Accuracy: 0.9391, Validation Accuracy: 0.9297
Epoch 9/25, Train Accuracy: 0.9434, Validation Accuracy: 0.9327
Epoch 10/25, Train Accuracy: 0.9473, Validation Accuracy: 0.9258
Epoch 11/25, Train Accuracy: 0.9499, Validation Accuracy: 0.9298
Epoch 12/25, Train Accuracy: 0.9552, Validation Accuracy: 0.9357
Epoch 13/25, Train Accuracy: 0.9567, Validation Accuracy: 0.9350
Epoch 14/25, Train Accuracy: 0.9580, Validation Accuracy: 0.9343
Epoch 15/25, Train Accuracy: 0.9617, Validation Accuracy: 0.9301
Epoch 16/25, Train Accuracy: 0.963

100%|███████████████████████████████████████████| 25/25 [08:13<00:00, 19.74s/it]


Epoch 1/25, Train Accuracy: 0.7939, Validation Accuracy: 0.8847
Epoch 2/25, Train Accuracy: 0.8708, Validation Accuracy: 0.8915
Epoch 3/25, Train Accuracy: 0.8841, Validation Accuracy: 0.9020
Epoch 4/25, Train Accuracy: 0.8922, Validation Accuracy: 0.9080
Epoch 5/25, Train Accuracy: 0.8983, Validation Accuracy: 0.9160
Epoch 6/25, Train Accuracy: 0.9027, Validation Accuracy: 0.9172
Epoch 7/25, Train Accuracy: 0.9079, Validation Accuracy: 0.9188
Epoch 8/25, Train Accuracy: 0.9106, Validation Accuracy: 0.9217
Epoch 9/25, Train Accuracy: 0.9139, Validation Accuracy: 0.9199
Epoch 10/25, Train Accuracy: 0.9178, Validation Accuracy: 0.9232
Epoch 11/25, Train Accuracy: 0.9183, Validation Accuracy: 0.9299
Epoch 12/25, Train Accuracy: 0.9222, Validation Accuracy: 0.9233
Epoch 13/25, Train Accuracy: 0.9242, Validation Accuracy: 0.9299
Epoch 14/25, Train Accuracy: 0.9258, Validation Accuracy: 0.9286
Epoch 15/25, Train Accuracy: 0.9266, Validation Accuracy: 0.9319
Epoch 16/25, Train Accuracy: 0.929

100%|███████████████████████████████████████████| 25/25 [07:41<00:00, 18.44s/it]


Epoch 1/25, Train Accuracy: 0.7966, Validation Accuracy: 0.8741
Epoch 2/25, Train Accuracy: 0.8727, Validation Accuracy: 0.8976
Epoch 3/25, Train Accuracy: 0.8875, Validation Accuracy: 0.9069
Epoch 4/25, Train Accuracy: 0.8979, Validation Accuracy: 0.9117
Epoch 5/25, Train Accuracy: 0.9013, Validation Accuracy: 0.9139
Epoch 6/25, Train Accuracy: 0.9081, Validation Accuracy: 0.9182
Epoch 7/25, Train Accuracy: 0.9114, Validation Accuracy: 0.9208
Epoch 8/25, Train Accuracy: 0.9140, Validation Accuracy: 0.9218
Epoch 9/25, Train Accuracy: 0.9170, Validation Accuracy: 0.9214
Epoch 10/25, Train Accuracy: 0.9204, Validation Accuracy: 0.9228
Epoch 11/25, Train Accuracy: 0.9241, Validation Accuracy: 0.9249
Epoch 12/25, Train Accuracy: 0.9253, Validation Accuracy: 0.9304
Epoch 13/25, Train Accuracy: 0.9275, Validation Accuracy: 0.9290
Epoch 14/25, Train Accuracy: 0.9299, Validation Accuracy: 0.9270
Epoch 15/25, Train Accuracy: 0.9314, Validation Accuracy: 0.9244
Epoch 16/25, Train Accuracy: 0.932

100%|███████████████████████████████████████████| 25/25 [08:13<00:00, 19.73s/it]


Epoch 1/25, Train Accuracy: 0.8185, Validation Accuracy: 0.8872
Epoch 2/25, Train Accuracy: 0.8826, Validation Accuracy: 0.8997
Epoch 3/25, Train Accuracy: 0.8970, Validation Accuracy: 0.9021
Epoch 4/25, Train Accuracy: 0.9045, Validation Accuracy: 0.9157
Epoch 5/25, Train Accuracy: 0.9093, Validation Accuracy: 0.9166
Epoch 6/25, Train Accuracy: 0.9153, Validation Accuracy: 0.9253
Epoch 7/25, Train Accuracy: 0.9193, Validation Accuracy: 0.9256
Epoch 8/25, Train Accuracy: 0.9225, Validation Accuracy: 0.9268
Epoch 9/25, Train Accuracy: 0.9276, Validation Accuracy: 0.9250
Epoch 10/25, Train Accuracy: 0.9294, Validation Accuracy: 0.9323
Epoch 11/25, Train Accuracy: 0.9329, Validation Accuracy: 0.9254
Epoch 12/25, Train Accuracy: 0.9348, Validation Accuracy: 0.9315
Epoch 13/25, Train Accuracy: 0.9365, Validation Accuracy: 0.9313
Epoch 14/25, Train Accuracy: 0.9400, Validation Accuracy: 0.9323
Epoch 15/25, Train Accuracy: 0.9421, Validation Accuracy: 0.9310
Epoch 16/25, Train Accuracy: 0.943

100%|███████████████████████████████████████████| 25/25 [08:15<00:00, 19.82s/it]


Epoch 1/25, Train Accuracy: 0.8448, Validation Accuracy: 0.8867
Epoch 2/25, Train Accuracy: 0.8936, Validation Accuracy: 0.8976
Epoch 3/25, Train Accuracy: 0.9059, Validation Accuracy: 0.9106
Epoch 4/25, Train Accuracy: 0.9133, Validation Accuracy: 0.9187
Epoch 5/25, Train Accuracy: 0.9194, Validation Accuracy: 0.9243
Epoch 6/25, Train Accuracy: 0.9253, Validation Accuracy: 0.9231
Epoch 7/25, Train Accuracy: 0.9293, Validation Accuracy: 0.9262
Epoch 8/25, Train Accuracy: 0.9342, Validation Accuracy: 0.9300
Epoch 9/25, Train Accuracy: 0.9387, Validation Accuracy: 0.9328
Epoch 10/25, Train Accuracy: 0.9409, Validation Accuracy: 0.9346
Epoch 11/25, Train Accuracy: 0.9435, Validation Accuracy: 0.9323
Epoch 12/25, Train Accuracy: 0.9462, Validation Accuracy: 0.9345
Epoch 13/25, Train Accuracy: 0.9492, Validation Accuracy: 0.9333
Epoch 14/25, Train Accuracy: 0.9518, Validation Accuracy: 0.9352
Epoch 15/25, Train Accuracy: 0.9532, Validation Accuracy: 0.9378
Epoch 16/25, Train Accuracy: 0.956

 68%|█████████████████████████████▏             | 17/25 [05:30<02:37, 19.72s/it]