## Setup

In [None]:
!pip install wandb



In [None]:
import wandb

wandb.init(
    project="self-expanding-nets",
    name="cifar10 resnet, exp, no freeze"
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdown-shift[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [None]:
wandb.finish()

In [None]:
from abc import abstractmethod, ABC

import numpy as np

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

from sklearn.metrics import accuracy_score
from tqdm import tqdm

SEED = 8642
torch.manual_seed(8642)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Utils

In [None]:
def dense_to_sparse(dense_tensor: torch.Tensor) -> torch.Tensor:
    indices = dense_tensor.nonzero(as_tuple=True)
    values = dense_tensor[indices]
    indices = torch.stack(indices)

    sparse_tensor = torch.sparse_coo_tensor(indices, values, dense_tensor.size(), device=device)
    return sparse_tensor


def convert_dense_to_sparse_network(model: nn.Module) -> nn.Module:
    """
    Converts a given dense neural network model to a sparse neural network model.

    This function recursively iterate through the given model and replaces all instances of
    `nn.Linear` layers with `SparseLinear` layers

    Args:
        model (nn.Module): The dense neural network model to be converted.

    Returns:
        nn.Module: A new neural network model with sparse layers.
    """
    new_model = model.__class__()

    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            sparse_weight = dense_to_sparse(module.weight.data)
            sparse_bias = dense_to_sparse(module.bias.data)

            setattr(new_model, name, ExpandingLinear(sparse_weight, sparse_bias, device=device))
        else:
            setattr(new_model, name, convert_dense_to_sparse_network(module))
    return new_model


def get_model_last_layer(model):
    if isinstance(model, ResnetExp):   # TODO: generalize
        return get_model_last_layer(model.expanding_head)
    if isinstance(model, SparseModule):
        return model
    else:
        return list(model.children())[-1]


In [None]:
class NonlinearityMetric(ABC):
    def __init__(self, loss_fn):
        self.loss_fn = loss_fn

    @abstractmethod
    def calculate(self, model, X_arr, y_arr):
        pass


# Метрика 1: Средний градиент для каждого ребра
class GradientMeanEdgeMetric(NonlinearityMetric):
    def calculate(self, model, X_arr, y_arr):
        model.eval()
        model.zero_grad()

        y_pred = model(X_arr).squeeze()
        loss = self.loss_fn(y_pred, y_arr)
        loss.backward()

        last_layer = get_model_last_layer(model)

        # Градиенты для разреженных весов
        edge_gradients = last_layer.weight_values.grad.abs()
        model.zero_grad()
        return edge_gradients


# Метрика 3: Чувствительность к возмущению для каждого ребра
class PerturbationSensitivityEdgeMetric(NonlinearityMetric):
    def __init__(self, loss_fn, epsilon=1e-2):
        super().__init__(loss_fn)
        self.epsilon = epsilon

    def calculate(self, model, X_arr, y_arr):
        model.eval()

        # Оригинальный вывод модели
        original_output = model(X_arr).detach()

        last_layer = get_model_last_layer(model)
        sensitivities = torch.zeros_like(last_layer.weight_values)

        # Возмущение каждого веса
        for idx in range(last_layer.weight_values.size(0)):
            with torch.no_grad():
                original_value = last_layer.weight_values[idx].item()
                last_layer.weight_values[idx] += self.epsilon

                # Пересчет модели с возмущением
                perturbed_output = model(X_arr)
                sensitivity = (perturbed_output - original_output).abs().mean().item()
                sensitivities[idx] = sensitivity

                # Восстановление оригинального значения
                last_layer.weight_values[idx] = original_value

        return sensitivities


In [None]:
class EdgeFinder:
    def __init__(self, metric: NonlinearityMetric, dataloader, device=torch.device('cpu')):
        self.metric = metric
        self.dataloader = dataloader
        self.device = device

    def calculate_edge_metric_for_dataloader(self, model, categorical_label: bool = True):
        accumulated_grads = None
        for data, target in self.dataloader:
            data, target = data.to(self.device), target.to(self.device)#.to(torch.float32)

            if not categorical_label:
                target = target.to(torch.float32)

            metric = self.metric.calculate(model, data, target)

            if accumulated_grads is None:
                accumulated_grads = torch.zeros_like(metric).to(self.device)

            accumulated_grads += metric

        return accumulated_grads / len(self.dataloader)

    def choose_edges_top_k(self, model, top_k: int):
        avg_metric = self.calculate_edge_metric_for_dataloader(model)
        sorted_indices = torch.argsort(avg_metric, descending=True)
        last_layer = get_model_last_layer(model)
        return last_layer.weight_indices[:, sorted_indices[:top_k]]

    def choose_edges_top_percent(self, model, percent: float):
        percent = min(max(percent, 0.0), 1.0)  # percent in [0, 1]
        avg_metric = self.calculate_edge_metric_for_dataloader(model)
        k = int(percent * avg_metric.numel())
        sorted_indices = torch.argsort(avg_metric, descending=True)
        last_layer = get_model_last_layer(model)
        return last_layer.weight_indices[:, sorted_indices[:k]]

    def choose_edges_threshold(self, model, threshold):
        avg_metric = self.calculate_edge_metric_for_dataloader(model)
        mask = avg_metric > threshold
        last_layer = get_model_last_layer(model)
        return last_layer.weight_indices[:, mask.nonzero(as_tuple=True)[0]]


In [None]:
# def train_sparse_recursive(model, train_loader, val_loader, num_epochs, metric,
#                            edge_replacement_func=None, logging=True,
#                            expansion_criterion=None, metric_threshold: float = 0.05,
#                            delta_threshold: float = 0.25, n_prev_epochs: int = 3,
#                            get_n_neurons_func=None):
#     optimizer = optim.Adam(model.parameters(), lr=1e-4)
#     criterion = nn.CrossEntropyLoss()
#     loss_history = []
#     prev_replacement_epoch = -1

#     for epoch in range(num_epochs):
#         model.train()
#         train_loss = 0
#         for inputs, targets in tqdm(train_loader):
#             outputs = model(inputs)
#             loss = criterion(outputs, targets)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
#             train_loss += loss.item()

#         train_loss /= len(train_loader)

#         model.eval()
#         val_loss = 0
#         all_targets = []
#         all_preds = []
#         with torch.no_grad():
#             for inputs, targets in val_loader:
#                 outputs = model(inputs)
#                 loss = criterion(outputs, targets)
#                 val_loss += loss.item()

#                 preds = torch.argmax(outputs, dim=1)
#                 all_targets.extend(targets.cpu().numpy())
#                 all_preds.extend(preds.cpu().numpy())

#         val_loss /= len(val_loader)
#         val_accuracy = accuracy_score(all_targets, all_preds)
#         loss_history.append(val_loss)

#         print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
#               f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}\n")

#         if logging:
#             wandb.log({"val_accuracy": val_accuracy, "train_loss": train_loss, "val_loss": val_loss})

#         if edge_replacement_func and (epoch - prev_replacement_epoch) >= n_prev_epochs and expansion_criterion:
#             if expansion_criterion(loss_history, n_prev_epochs, delta_threshold):
#                 if get_n_neurons_func:
#                     n_neurons = get_n_neurons_func(loss_history, n_prev_epochs, delta_threshold)
#                 else:
#                     n_neurons = 2
#                 edge_replacement_func(model, optimizer, val_loader, metric,
#                                       metric_threshold, n_neurons)
#                 prev_replacement_epoch = epoch
#                 print("Replacement done\n")
#             else:
#                 print("Replacement denied\n")


def train_sparse_recursive(model, train_loader, val_loader, num_epochs, metric,
                           edge_replacement_func=None, logging=True,
                           expansion_criterion=None, metric_threshold: float = 0.05,
                           delta_threshold: float = 0.25, n_prev_epochs: int = 3,
                           get_n_neurons_func=None, device=None):

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()
    loss_history = []
    prev_replacement_epoch = -1

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in tqdm(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0
        all_targets = []
        all_preds = []
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                preds = torch.argmax(outputs, dim=1)
                all_targets.extend(targets.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())

        val_loss /= len(val_loader)
        val_accuracy = accuracy_score(all_targets, all_preds)
        loss_history.append(val_loss)

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}\n")

        if logging:
            wandb.log({"val_accuracy": val_accuracy, "train_loss": train_loss, "val_loss": val_loss})

        if edge_replacement_func and (epoch - prev_replacement_epoch) >= n_prev_epochs and expansion_criterion:
            if expansion_criterion(loss_history, n_prev_epochs, delta_threshold):
                if get_n_neurons_func:
                    n_neurons = get_n_neurons_func(loss_history, n_prev_epochs, delta_threshold)
                else:
                    n_neurons = 2
                edge_replacement_func(model, optimizer, val_loader, metric,
                                      metric_threshold, n_neurons)
                prev_replacement_epoch = epoch
                print("Replacement done\n")
            else:
                print("Replacement denied\n")


## New model

In [None]:
new_weight = torch.empty(1, device=device)

weight_value = 1 / n_neurons
eps = 1e-4

new_weight.uniform_(weight_value - eps, weight_value + eps)  # ReLU
new_weight

tensor([0.4999], device='cuda:0')

In [None]:
class SparseModule(ABC, nn.Module):
    def __init__(self, weight_size, device='cpu', eps: float = 1e-4):
        super(SparseModule, self).__init__()
        self.weight_indices = torch.empty(2, 0, dtype=torch.long, device=device)
        self.weight_values = nn.Parameter(torch.empty(0, device=device))
        self.weight_size = list(weight_size)
        self.device = device
        self.eps = eps

    def add_edge(self, child, parent, n_neurons: int):
        assert n_neurons >= 1

        new_edge = torch.tensor([[child, parent]], dtype=torch.long, device=self.device).t()
        self.weight_indices = torch.cat([self.weight_indices, new_edge], dim=1)

        new_weight = torch.empty(1, device=self.device)
        weight_value = 1 / n_neurons
        new_weight.uniform_(weight_value - self.eps, weight_value + self.eps)  # TODO: not only ReLU

        self.weight_values.data = torch.cat([self.weight_values.data, new_weight])

    def create_sparse_tensor(self):
        return torch.sparse_coo_tensor(self.weight_indices, self.weight_values, self.weight_size, device=self.device)

    @abstractmethod
    def replace(self, child, parent, n_neurons: int = 2):
        pass

    def replace_many(self, children, parents, n_neurons: int = 2):
        for c, p in zip(children, parents):
            self.replace(c, p, n_neurons)


class EmbedLinear(SparseModule):
    def __init__(self, weight_size, activation=nn.ReLU(), device='cpu'):
        super(EmbedLinear, self).__init__([0, weight_size], device=device)
        self.child_counter = 0
        self.activation = activation
        self.device = device

    def replace(self, child, parent, n_neurons: int = 2):
        for i in range(n_neurons):
            self.add_edge(self.child_counter + i, parent, n_neurons)
        self.weight_size[0] += n_neurons
        self.child_counter += n_neurons

    def forward(self, input):
        sparse_embed_weight = self.create_sparse_tensor()
        # print("\nEmbedLinear shapes: ", sparse_embed_weight.shape, input.shape)
        output = torch.sparse.mm(sparse_embed_weight, input.t()).t()
        return torch.cat([input, self.activation(output)], dim=1)


class ExpandingLinear(SparseModule):
    def __init__(self, weight: torch.sparse_coo_tensor, bias: torch.sparse_coo_tensor, device='cpu'):
        super(ExpandingLinear, self).__init__(weight.size(), device=device)

        weight = weight.coalesce()
        self.weight_indices = weight.indices().to(device)
        self.weight_values = nn.Parameter(weight.values().to(device))

        self.embed_linears = []

        bias = bias.coalesce()
        self.bias_indices = bias.indices().to(device)
        self.bias_values = nn.Parameter(bias.values().to(device))
        self.bias_size = list(bias.size())

        self.current_iteration = -1
        self.device = device

    def replace(self, child, parent, n_neurons: int = 2):
        if self.current_iteration == -1:
            self.current_iteration = 0

        if len(self.embed_linears) <= self.current_iteration:
            self.embed_linears.append(EmbedLinear(self.weight_size[1], device=self.device))

        matches = (self.weight_indices[0] == child) & (self.weight_indices[1] == parent)

        assert torch.any(matches), "Edge must extist"

        max_parent = self.weight_indices[1].max().item() + 1  # n_neurons # before deleting edge

        self.weight_indices = self.weight_indices[:, ~matches]
        self.weight_values = nn.Parameter(self.weight_values[~matches])

        for i in range(n_neurons):
            self.add_edge(child, max_parent + i)

        self.weight_size[1] += n_neurons
        self.embed_linears[self.current_iteration].replace(child, parent, n_neurons=n_neurons)

    def replace_many(self, children, parents, n_neurons: int = 2):
        self.current_iteration += (len(children) != 0 and len(parents) != 0)
        super().replace_many(children, parents, n_neurons)

    def forward(self, input):
        for embed_linear in self.embed_linears:
            input = embed_linear(input)

        sparse_weight = self.create_sparse_tensor()
        sparse_bias = torch.sparse_coo_tensor(self.bias_indices, self.bias_values, self.bias_size,
                                              device=self.device).to_dense()

        try:
            output = torch.sparse.mm(sparse_weight, input.t()).t()
            output += sparse_bias.unsqueeze(0)
        except:
            print(sparse_weight.shape, sparse_bias.shape, input.t().shape)
            assert 0 == 1

        return output

In [None]:
def edge_replacement_func_new_layer(model, optim, val_loader, metric,
                                    threshold: float = 0.05, n_neurons: int = 2):
    layer = model.expanding_head.fc3    # TODO: select last layer
    ef = EdgeFinder(metric, val_loader, device)
    vals = ef.calculate_edge_metric_for_dataloader(model)
    # print("Edge metrics:", vals, max(vals), sum(vals))
    chosen_edges = ef.choose_edges_threshold(model, threshold)
    print("Chosen edges:", chosen_edges, len(chosen_edges[0]))
    layer.replace_many(*chosen_edges, n_neurons=n_neurons)

    if layer.embed_linears:
        if not any(layer.embed_linears[-1].weight_values is p for pg in optim.param_groups for p in pg['params']): # Changed line
            optim.add_param_group({'params': layer.embed_linears[-1].weight_values})
    else:
        print("Empty metric")
        dummy_param = torch.zeros_like(layer.weight_values)
        if not any(dummy_param is p for pg in optim.param_groups for p in pg['params']): # Changed line
            optim.add_param_group({'params': dummy_param})

    return {'max': max(vals), 'sum': sum(vals), 'len': len(vals), 'len_choose': len(chosen_edges[0])}

In [None]:
# class SimpleFCN(nn.Module):
#     def __init__(self, input_size=100):
#         super(SimpleFCN, self).__init__()
#         self.relu = nn.ReLU()
#         self.fc1 = nn.Linear(input_size, 50)
#         self.dropout = nn.Dropout(p=0.5)
#         self.fc2 = nn.Linear(50, 50)
#         self.fc3 = nn.Linear(50, 10)

#     def forward(self, x):
#         x = self.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = self.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = self.fc3(x)
#         return x

In [None]:
# class DummyFCN(nn.Module):
#     def __init__(self, input_size=100):
#         super().__init__()
#         self.relu = nn.ReLU()
#         self.fc1 = nn.Linear(input_size, 50)
#         self.dropout = nn.Dropout(p=0.5)
#         # self.fc2 = nn.Linear(50, 50)
#         self.fc3 = nn.Linear(50, 10)

#     def forward(self, x):
#         x = self.relu(self.fc1(x))
#         # x = self.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = self.fc3(x)
#         return x

## Dynamic sublayer size adjustment

In [None]:
def get_expansion_criterion(loss_history, n_prev_epochs: int = 3,
                            delta_threshold: float = 0.25) -> bool:
    """
    Idea: extend layer if mean of [|∆loss_i|] over n previous epochs
    is smaller than delta_threshold
    """
    # TODO: derivation from mean
    arr = np.array(loss_history[-n_prev_epochs:])
    deltas = np.array([arr[i + 1] - arr[i] for i in range(len(arr) - 1)])
    mean_delta = np.mean(np.abs(deltas))
    print("Mean delta: ", mean_delta)
    return mean_delta < delta_threshold

In [None]:
def get_n_neurons_by_delta(loss_history, n_prev_epochs: int = 3,
                           delta_threshold: float = 0.25, upper_bound: int = 10):
    arr = np.array(loss_history[-n_prev_epochs:])
    deltas = np.array([arr[i + 1] - arr[i] for i in range(len(arr) - 1)])
    mean_delta = np.mean(np.abs(deltas))
    n_neurons = min(int(1 / mean_delta), upper_bound)
    print("Number of new neurons per edge: ", n_neurons)
    return n_neurons

In [None]:
def get_sqrt_n_neurons_by_delta(loss_history, n_prev_epochs: int = 3,
                           delta_threshold: float = 0.25, upper_bound: int = 10):
    arr = np.array(loss_history[-n_prev_epochs:])
    deltas = np.array([arr[i + 1] - arr[i] for i in range(len(arr) - 1)])
    mean_delta = np.mean(np.abs(deltas))
    n_neurons = min(int(np.sqrt(1 / mean_delta)), upper_bound)
    print("Number of new neurons per edge: ", n_neurons)
    return n_neurons

## Data

In [None]:
BATCH_SIZE = 64

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

train_dataset = datasets.CIFAR10(root='./data', train=True,
                                  download=True, transform=transform)
val_dataset = datasets.CIFAR10(root='./data', train=False,
                                  download=True, transform=transform)

# train_size = int(0.8 * len(train_dataset))
# val_size = len(train_dataset) - train_size
# # train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
# train_dataset, val_dataset, test_dataset = random_split(train_dataset, [train_size // 2, val_size // 2, len(train_dataset) - (train_size // 2 + val_size // 2)])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(len(train_loader))

Files already downloaded and verified
Files already downloaded and verified
782


## Model


In [None]:
# model = SimpleFCN(input_size=64)
# model = DummyFCN(input_size=784)
# sparse_model = convert_dense_to_sparse_network(model)
# criterion = nn.CrossEntropyLoss()
# ef = EdgeFinder(GradientMeanEdgeMetric(criterion), val_loader, device)

In [None]:
# base_model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)
# base_model = torch.nn.Sequential(*(list(base_model.children())[:-1]))
# base_model

In [None]:
class ExpandingHead(nn.Module):
    def __init__(self, input_size: int = 64, hidden_size: int = 50, output_size: int = 10):
        super().__init__()
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [None]:
class ResnetExp(nn.Module):
    def __init__(self, freeze_base: bool = False):
        super().__init__()
        self.base_model = torch.hub.load("chenyaofo/pytorch-cifar-models",
                                         "cifar10_resnet20", pretrained=True)
        self.base_model = torch.nn.Sequential(
            *(list(self.base_model.children())[:-1])
        )
        self.expanding_head = convert_dense_to_sparse_network(
            ExpandingHead(input_size=64, hidden_size=50, output_size=10)
        ).to(device)
        # if freeze_base:
        #     self.freeze(self.base_model)

    def forward(self, x):
        x = self.base_model(x)
        x = x.view(x.size(0), -1)
        x = self.expanding_head(x)
        return x

In [None]:
rexp = ResnetExp(device)
rexp = rexp.to(device)
img = val_dataset[0][0].unsqueeze(0).to(device)
rexp(img)

Using cache found in /root/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master


tensor([[-0.0817,  0.0430,  0.0596, -0.1064,  0.1145, -0.0934, -0.0041,  0.1266,
         -0.1026, -0.0511]], device='cuda:0', grad_fn=<AsStridedBackward0>)

## Train

❗️TODO:
- adjust train loop code to only extend the head
- freeze the backbone
- add GPU support

In [None]:
criterion = nn.CrossEntropyLoss()
ef = EdgeFinder(GradientMeanEdgeMetric(criterion), val_loader, device)
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
n_prev_epochs = 100
delta_threshold = 0.1
metric_threshold = 0.015
num_epochs = 15

In [None]:
train_sparse_recursive(rexp,
                       train_loader,
                       val_loader,
                       num_epochs=num_epochs,
                       metric=GradientMeanEdgeMetric(criterion),
                       edge_replacement_func=edge_replacement_func_new_layer,
                       expansion_criterion=get_expansion_criterion,
                       logging=True,
                       delta_threshold=delta_threshold,
                       metric_threshold=metric_threshold,
                       n_prev_epochs=n_prev_epochs,
                       get_n_neurons_func=get_sqrt_n_neurons_by_delta,
                       device=device)

100%|██████████| 782/782 [00:35<00:00, 22.26it/s]


Epoch 1/15, Train Loss: 0.0694, Val Loss: 0.8137, Val Accuracy: 0.9035



100%|██████████| 782/782 [00:35<00:00, 22.23it/s]


Epoch 2/15, Train Loss: 0.0756, Val Loss: 0.7664, Val Accuracy: 0.9096



100%|██████████| 782/782 [00:34<00:00, 22.34it/s]


Epoch 3/15, Train Loss: 0.0710, Val Loss: 0.7671, Val Accuracy: 0.9082



100%|██████████| 782/782 [00:35<00:00, 22.29it/s]


Epoch 4/15, Train Loss: 0.0608, Val Loss: 0.8417, Val Accuracy: 0.9117



100%|██████████| 782/782 [00:35<00:00, 22.03it/s]


Epoch 5/15, Train Loss: 0.0700, Val Loss: 0.8038, Val Accuracy: 0.9038



100%|██████████| 782/782 [00:34<00:00, 22.54it/s]


Epoch 6/15, Train Loss: 0.0613, Val Loss: 0.8233, Val Accuracy: 0.9099



100%|██████████| 782/782 [00:34<00:00, 22.51it/s]


Epoch 7/15, Train Loss: 0.0614, Val Loss: 0.7964, Val Accuracy: 0.9100



100%|██████████| 782/782 [00:34<00:00, 22.77it/s]


Epoch 8/15, Train Loss: 0.0581, Val Loss: 0.8279, Val Accuracy: 0.9058



100%|██████████| 782/782 [00:34<00:00, 22.82it/s]


Epoch 9/15, Train Loss: 0.0685, Val Loss: 0.7769, Val Accuracy: 0.9069



100%|██████████| 782/782 [00:34<00:00, 22.63it/s]


Epoch 10/15, Train Loss: 0.0566, Val Loss: 0.7902, Val Accuracy: 0.9085



100%|██████████| 782/782 [00:35<00:00, 21.97it/s]


Epoch 11/15, Train Loss: 0.0593, Val Loss: 0.8137, Val Accuracy: 0.9041



100%|██████████| 782/782 [00:35<00:00, 21.84it/s]


Epoch 12/15, Train Loss: 0.0595, Val Loss: 0.8616, Val Accuracy: 0.9046



100%|██████████| 782/782 [00:35<00:00, 22.21it/s]


Epoch 13/15, Train Loss: 0.0622, Val Loss: 0.7867, Val Accuracy: 0.9074



100%|██████████| 782/782 [00:35<00:00, 21.97it/s]


Epoch 14/15, Train Loss: 0.0581, Val Loss: 0.8030, Val Accuracy: 0.9084



100%|██████████| 782/782 [00:35<00:00, 21.88it/s]


Epoch 15/15, Train Loss: 0.0565, Val Loss: 0.7692, Val Accuracy: 0.9072



In [None]:
wandb.finish()

0,1
train_loss,█▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▇▂▂▁▇▂▂▂▁▃▂▂▁▁▁▁▁▇▄▂▃▂▂▂
val_accuracy,▇████▅▆▅▆▅▅▅▅▅▅▆▆▆▆▅▆▆▆▅▆▆▆▆▆▆▁▆▅▆▆▅▅▅▅▅
val_loss,▄▁▁▁▅▃▄▄▄▅▅▅▆▆▆▆▃▄▄▄▃▃▃▄▄▆▃▃▄▄▅▅█▆▅▅▅▅▆▆

0,1
train_loss,0.20335
val_accuracy,0.9096
val_loss,0.58987
