In [1]:

from copy import deepcopy

import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm

In [2]:
from senmodel.model.utils import *
from senmodel.metrics.nonlinearity_metrics import *
from senmodel.metrics.edge_finder import *

In [3]:
torch.manual_seed(0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
def train_sparse_recursive(model, train_loader, val_loader, num_epochs, metric, edge_replacement_func=None,
                           window_size=2, threshold=0.10):
    optimizer = optim.Adam(model.parameters(), lr=5e-5)
    criterion = nn.CrossEntropyLoss()

    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in tqdm(train_loader):
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0
        all_targets = []
        all_preds = []
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                preds = torch.argmax(outputs, dim=1)
                all_targets.extend(targets.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())

        val_loss /= len(val_loader)
        val_accuracy = accuracy_score(all_targets, all_preds)

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
        new_l = {}
        # if edge_replacement_func and epoch % 8 == 0 and epoch != 0:
        #     new_l = edge_replacement_func(model, optimizer, val_loader, metric)
        val_losses.append(val_loss)
        if len(val_losses) > window_size:
            recent_changes = [abs(val_losses[i] - val_losses[i - 1]) for i in range(-window_size, 0)]
            avg_change = sum(recent_changes) / window_size
            if avg_change < threshold:
                new_l = edge_replacement_func(model, optimizer, val_loader, metric)

                if new_l["len_choose"] == 0:
                    break

        wandb.log({'val loss': val_loss, 'val accuracy': val_accuracy, 'train loss': train_loss} | new_l)


def edge_replacement_func_new_layer(model, optim, val_loader, metric):
    layer = model.fc1
    ef = EdgeFinder(metric, val_loader, device)
    vals = ef.calculate_edge_metric_for_dataloader(model)
    print(f"{len(vals)=} {max(vals)=}  {sum(vals)=} {min(vals)=}")
    chosen_edges = ef.choose_edges_threshold(model, 0.15)
    print("choose:", chosen_edges, len(chosen_edges[0]))
    layer.replace_many(*chosen_edges)

    if layer.embed_linears:
        optim.add_param_group({'params': layer.embed_linears[-1].weight_values})
    else:
        print("empty metric")
        dummy_param = torch.zeros_like(layer.weight_values)
        optim.add_param_group({'params': dummy_param})

    return {'max': max(vals), 'sum': sum(vals), 'len': len(vals), 'len_choose': len(chosen_edges[0])}

In [5]:
# Define the model
class SimpleFCN(nn.Module):
    def __init__(self, input_size=28 * 28):
        super(SimpleFCN, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)

    def forward(self, x):
        x = self.fc1(x)
        return x

In [6]:
# Dataset and Dataloader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

# Load dataset and split into train/validation sets
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [7]:
label_dict = {}
for i in val_dataset:
    if i[1] not in label_dict.keys():
        label_dict[i[1]] = 0
    else:
        label_dict[i[1]] += 1
label_dict

{4: 1179,
 7: 1285,
 9: 1139,
 5: 1121,
 3: 1193,
 6: 1191,
 8: 1150,
 0: 1225,
 1: 1353,
 2: 1154}

In [8]:
criterion = nn.CrossEntropyLoss()
metrics = [
    MagnitudeL2Metric(criterion),
    SNIPMetric(criterion),
    GradientMeanEdgeMetric(criterion),
    PerturbationSensitivityEdgeMetric(criterion),
]
model = SimpleFCN()
sparse_model = convert_dense_to_sparse_network(model)
sparse_linear = deepcopy(sparse_model.fc1)
sparse_model.fc1.weight_indices

tensor([[  0,   0,   0,  ...,   9,   9,   9],
        [  0,   1,   2,  ..., 781, 782, 783]])

In [9]:
sparse_model.fc1.weight_indices[:, :50]

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [10]:
# ef = EdgeFinder(metrics[0], val_loader, device)
# print(ef.choose_edges_threshold(sparse_model, 0.3))

# raise "e"

In [11]:
import wandb

wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: fedornigretuk. Use `wandb login --relogin` to force relogin


True

In [12]:
run = wandb.init(
    project="self-expanding-nets",
    name=f"replace=(auto epoch, threshold 0.15), lr=5e-5, magnetic l2 metric",
)

In [13]:
train_sparse_recursive(sparse_model, train_loader, val_loader, 64, metrics[0],
                       edge_replacement_func=edge_replacement_func_new_layer)

100%|██████████| 750/750 [00:10<00:00, 74.65it/s]


Epoch 1/64, Train Loss: 1.7941, Val Loss: 1.3948, Val Accuracy: 0.7672


100%|██████████| 750/750 [00:09<00:00, 78.61it/s]


Epoch 2/64, Train Loss: 1.1640, Val Loss: 0.9907, Val Accuracy: 0.8234


100%|██████████| 750/750 [00:09<00:00, 83.29it/s]


Epoch 3/64, Train Loss: 0.8757, Val Loss: 0.7890, Val Accuracy: 0.8393


100%|██████████| 750/750 [00:09<00:00, 79.99it/s]


Epoch 4/64, Train Loss: 0.7201, Val Loss: 0.6701, Val Accuracy: 0.8535


100%|██████████| 750/750 [00:10<00:00, 71.24it/s]


Epoch 5/64, Train Loss: 0.6238, Val Loss: 0.5919, Val Accuracy: 0.8649
len(vals)=7840 max(vals)=tensor(1.0000, grad_fn=<UnbindBackward0>)  sum(vals)=tensor(882.8075, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[  0,   0,   0,  ...,   9,   9,   9],
        [ 65,  66,  71,  ..., 720, 745, 769]]) 2072


100%|██████████| 750/750 [00:14<00:00, 52.25it/s]


Epoch 6/64, Train Loss: 5.8177, Val Loss: 5.3259, Val Accuracy: 0.0503


100%|██████████| 750/750 [00:14<00:00, 52.33it/s]


Epoch 7/64, Train Loss: 4.9305, Val Loss: 4.5432, Val Accuracy: 0.0542


100%|██████████| 750/750 [00:14<00:00, 53.21it/s]


Epoch 8/64, Train Loss: 4.2151, Val Loss: 3.9068, Val Accuracy: 0.0679


100%|██████████| 750/750 [00:14<00:00, 51.05it/s]


Epoch 9/64, Train Loss: 3.6367, Val Loss: 3.3941, Val Accuracy: 0.0929


100%|██████████| 750/750 [00:14<00:00, 53.09it/s]


Epoch 10/64, Train Loss: 3.1666, Val Loss: 2.9730, Val Accuracy: 0.1284


100%|██████████| 750/750 [00:13<00:00, 53.92it/s]


Epoch 11/64, Train Loss: 2.7777, Val Loss: 2.6212, Val Accuracy: 0.1782


100%|██████████| 750/750 [00:13<00:00, 56.86it/s]


Epoch 12/64, Train Loss: 2.4520, Val Loss: 2.3252, Val Accuracy: 0.2361


100%|██████████| 750/750 [00:13<00:00, 55.07it/s]


Epoch 13/64, Train Loss: 2.1781, Val Loss: 2.0750, Val Accuracy: 0.3019


100%|██████████| 750/750 [00:14<00:00, 51.78it/s]


Epoch 14/64, Train Loss: 1.9462, Val Loss: 1.8621, Val Accuracy: 0.3689


100%|██████████| 750/750 [00:15<00:00, 47.49it/s]


Epoch 15/64, Train Loss: 1.7499, Val Loss: 1.6820, Val Accuracy: 0.4337


100%|██████████| 750/750 [00:14<00:00, 51.98it/s]


Epoch 16/64, Train Loss: 1.5843, Val Loss: 1.5308, Val Accuracy: 0.4899


100%|██████████| 750/750 [00:14<00:00, 53.33it/s]


Epoch 17/64, Train Loss: 1.4469, Val Loss: 1.4061, Val Accuracy: 0.5369


100%|██████████| 750/750 [00:14<00:00, 53.39it/s]


Epoch 18/64, Train Loss: 1.3326, Val Loss: 1.3010, Val Accuracy: 0.5775


100%|██████████| 750/750 [00:12<00:00, 58.69it/s]


Epoch 19/64, Train Loss: 1.2360, Val Loss: 1.2124, Val Accuracy: 0.6134
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(706.6859, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    0,  ...,    9,    9,    9],
        [ 784,  785,  786,  ..., 2852, 2854, 2855]]) 1263


100%|██████████| 750/750 [00:18<00:00, 39.92it/s]


Epoch 20/64, Train Loss: 1.3219, Val Loss: 1.2622, Val Accuracy: 0.6081
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(493.7534, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    0,  ...,    9,    9,    9],
        [2856, 2860, 2862,  ..., 4116, 4117, 4118]]) 809


100%|██████████| 750/750 [00:21<00:00, 34.17it/s]


Epoch 21/64, Train Loss: 1.3687, Val Loss: 1.3333, Val Accuracy: 0.6288
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(340.6533, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,  

100%|██████████| 750/750 [00:25<00:00, 29.23it/s]


Epoch 22/64, Train Loss: 1.3730, Val Loss: 1.3421, Val Accuracy: 0.6478
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(242.4694, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    3,    3,    3,    3,    3,    3,    3,    3,    3,
            3,    3,    3,    3,    3,    3,    3,    3,    3,    4,    4,    4,
            4,    4,    4,  

100%|██████████| 750/750 [00:24<00:00, 30.73it/s]


Epoch 23/64, Train Loss: 1.3388, Val Loss: 1.3150, Val Accuracy: 0.6821
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(179.6160, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   5,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    3,    3,    3,
            3,    3,    3,    3,    4,    4,    4,    4,    4,    4,    4,    4,
            4,    4,    4,    4,    4,    4,    4,    4,    5,    5,    5,    5,
            5,    5,    5,    5,    5,    5,    5,    5,    6,    6,    6,    6,
            6,    6,    6,    6,    6,    6,    6,    6,    6,    6,    6,    6,
            6,    6,    6,    6,    6,    7,    7,    7,    7,    7,    7,    7,
            7,    7,    7,  

100%|██████████| 750/750 [00:31<00:00, 23.45it/s]


Epoch 24/64, Train Loss: 1.3007, Val Loss: 1.2800, Val Accuracy: 0.7035
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(162.1776, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   5,    0,    0,    0,    0,    0,    0,    0,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    3,    3,    3,    3,    3,    4,
            4,    4,    4,    4,    4,    4,    4,    4,    4,    4,    4,    5,
            5,    5,    5,    5,    5,    5,    5,    5,    6,    6,    6,    6,
            6,    6,    6,    6,    6,    6,    6,    6,    6,    6,    6,    6,
            7,    7,    7,    7,    7,    7,    7,    7,    7,    7,    7,    7,
            7,    7,    7,    7,    7,    7,    7,    7,    8,    8,    8,    8,
            8,    8,    9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
            9,    9,    9,  

100%|██████████| 750/750 [00:35<00:00, 21.36it/s]


Epoch 25/64, Train Loss: 1.2639, Val Loss: 1.2454, Val Accuracy: 0.7181
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(146.6765, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    5,    6,    7,    9,    9,    9,    1,    6,    9,    9,    2,
            0,    1,    7,    9,    9,    9,    0,    0,    0,    0,    0,    0,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    2,
            2,    2,    2,    2,    2,    3,    3,    3,    4,    4,    4,    4,
            4,    4,    4,    5,    5,    5,    5,    6,    6,    6,    6,    6,
            6,    6,    6,    6,    6,    6,    6,    7,    7,    7,    7,    7,
            7,    7,    7,    7,    8,    8,    8,    9,    9,    9,    9,    9,
            9,    9,    9,    9,    9,    9,    9],
        [ 840, 1981, 2020, 2285, 2733, 2827, 2843, 2983, 3605, 3981, 4013, 4343,
         4955, 4992, 5266, 5393, 5394, 5864, 5868, 5870, 

100%|██████████| 750/750 [00:31<00:00, 24.00it/s]


Epoch 26/64, Train Loss: 1.2230, Val Loss: 1.2071, Val Accuracy: 0.7397
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(139.3961, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   1,    1,    1,    3,    4,    4,    6,    6,    6,    6,    6,    7,
            7,    7,    9,    9,    9,    0,    0,    0,    1,    1,    1,    2,
            7,    7,    8,    8,    9,    0,    1,    1,    2,    6,    6,    6,
            8,    8,    9,    1,    3,    4,    6,    0,    2,    5,    6,    7,
            0,    9,    6,    9,    2,    0,    7,    9,    9,    0,    0,    0,
            0,    0,    1,    1,    1,    1,    1,    2,    2,    2,    2,    3,
            3,    4,    4,    4,    4,    5,    5,    6,    6,    6,    6,    6,
            6,    6,    6,    7,    7,    7,    8,    8,    9,    9,    9,    9,
            9,    9,    9,    9,    9,    9],
        [ 991, 1185, 1218, 1491, 1634, 1765, 2018, 2019, 2052, 

100%|██████████| 750/750 [00:34<00:00, 21.56it/s]


Epoch 27/64, Train Loss: 1.1947, Val Loss: 1.1809, Val Accuracy: 0.7482
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(135.6727, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   3,    4,    4,    6,    6,    6,    6,    9,    9,    0,    1,    1,
            1,    2,    7,    8,    8,    0,    1,    1,    2,    6,    6,    6,
            9,    1,    4,    6,    0,    2,    5,    6,    0,    6,    9,    2,
            0,    9,    9,    0,    0,    0,    0,    1,    1,    2,    2,    2,
            3,    4,    4,    6,    6,    6,    6,    6,    7,    7,    7,    8,
            9,    9,    9,    9,    9],
        [6076, 6077, 6078, 6079, 6080, 6081, 6082, 6087, 6089, 6092, 6093, 6094,
         6095, 6096, 6097, 6099, 6100, 6102, 6103, 6104, 6105, 6106, 6107, 6108,
         6111, 6112, 6114, 6115, 6116, 6117, 6118, 6119, 6121, 6123, 6124, 6125,
         6126, 6128, 6129, 6130, 6132, 6133, 6134, 6135, 6138, 6140, 

100%|██████████| 750/750 [00:33<00:00, 22.37it/s]


Epoch 28/64, Train Loss: 1.1603, Val Loss: 1.1483, Val Accuracy: 0.7618
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(136.6050, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    1,    2,    2,    3,    3,    3,    4,    4,    4,    5,
            6,    6,    6,    7,    8,    9,    0,    0,    1,    1,    2,    6,
            7,    7,    8,    9,    0,    0,    1,    3,    3,    4,    6,    7,
            2,    4,    4,    6,    9,    5,    1,    6,    3,    4,    4,    6,
            6,    9,    1,    1,    7,    8,    0,    1,    1,    2,    6,    6,
            6,    9,    4,    0,    2,    5,    6,    0,    6,    9,    2,    0,
            9,    0,    0,    1,    2,    6,    6,    7,    7,    7,    8,    9,
            9,    9,    9],
        [ 823,  880, 1051, 1364, 1398, 1495, 1544, 1612, 1674, 1683, 1832, 1951,
         2005, 2025, 2149, 2262, 2499, 2722, 2868, 2916, 3066, 3110, 3239, 3659,


100%|██████████| 750/750 [00:37<00:00, 19.88it/s]


Epoch 29/64, Train Loss: 1.1292, Val Loss: 1.1187, Val Accuracy: 0.7690
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(128.8422, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    2,    3,    3,    4,    4,    4,    5,    6,    7,    8,
            9,    0,    0,    1,    6,    7,    9,    0,    4,    6,    2,    4,
            4,    6,    5,    6,    3,    6,    9,    7,    8,    0,    1,    2,
            6,    6,    5,    9,    2,    0,    0,    0,    2,    7,    9,    9,
            9],
        [6240, 6241, 6243, 6246, 6247, 6248, 6249, 6250, 6251, 6253, 6255, 6256,
         6257, 6258, 6259, 6261, 6263, 6264, 6267, 6268, 6273, 6274, 6276, 6277,
         6278, 6279, 6281, 6283, 6284, 6288, 6289, 6292, 6293, 6294, 6295, 6297,
         6298, 6300, 6305, 6309, 6310, 6311, 6313, 6314, 6316, 6320, 6323, 6325,
         6326]]) 49


100%|██████████| 750/750 [00:42<00:00, 17.72it/s]


Epoch 30/64, Train Loss: 1.0997, Val Loss: 1.0907, Val Accuracy: 0.7772
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(116.7817, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    2,    3,    4,    6,    7,    8,    0,    6,    0,    6,
            2,    4,    6,    5,    6,    3,    6,    8,    0,    1,    6,    6,
            5,    0,    0,    0,    2,    9,    9],
        [6327, 6328, 6329, 6330, 6333, 6336, 6337, 6338, 6341, 6343, 6346, 6348,
         6349, 6350, 6352, 6353, 6354, 6355, 6356, 6359, 6360, 6361, 6363, 6364,
         6365, 6368, 6369, 6370, 6371, 6373, 6375]]) 31


100%|██████████| 750/750 [00:38<00:00, 19.50it/s]


Epoch 31/64, Train Loss: 1.0736, Val Loss: 1.0662, Val Accuracy: 0.7805
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(114.9261, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    3,    6,    7,    8,    0,    6,    0,    6,    2,    4,
            6,    6,    3,    6,    8,    0,    6,    6,    5,    0,    0,    2,
            9,    9],
        [6376, 6377, 6379, 6381, 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389,
         6390, 6392, 6393, 6394, 6395, 6396, 6398, 6399, 6400, 6401, 6402, 6404,
         6405, 6406]]) 26


100%|██████████| 750/750 [00:39<00:00, 18.76it/s]


Epoch 32/64, Train Loss: 1.0494, Val Loss: 1.0432, Val Accuracy: 0.7823
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(154.2123, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    0,    0,    0,    0,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    3,    3,    3,
            3,    3,    3,    3,    3,    4,    4,    4,    4,    4,    4,    4,
            5,    5,    5,    5,    5,    5,    5,    5,    5,    6,    6,    6,
            6,    6,    6,    6,    6,    6,    6,    6,    7,    7,    7,    7,
            7,    7,    7,    7,    7,    7,    7,    7,    8,    8,    8,    8,
            8,    8,    9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
            9,    9,    9,    9,    0,    0,    0,    0,    1,    1,    1,    1,
            1,    1,    2,  

100%|██████████| 750/750 [00:40<00:00, 18.58it/s]


Epoch 33/64, Train Loss: 1.0591, Val Loss: 1.0504, Val Accuracy: 0.7722
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(160.8315, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    2,    2,    2,    2,    2,    3,    3,    3,    3,
            3,    4,    4,    4,    4,    5,    5,    5,    5,    5,    5,    5,
            6,    6,    6,    6,    6,    6,    6,    6,    7,    7,    7,    7,
            7,    7,    7,    7,    7,    8,    8,    8,    9,    9,    9,    9,
            9,    9,    0,    0,    1,    1,    1,    1,    2,    2,    2,    2,
            2,    4,    4,    4,    6,    6,    6,    7,    7,    7,    7,    8,
            8,    9,    9,    9,    9,    9,    9,    0,    0,    1,    1,    2,
            2,    3,    3,    3,    4,    4,    4,    4,    6,    7,    7,    7,
            8,    8,    8,  

100%|██████████| 750/750 [00:45<00:00, 16.64it/s]


Epoch 34/64, Train Loss: 1.0579, Val Loss: 1.0484, Val Accuracy: 0.7818
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(126.3644, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    1,    1,    1,    1,    1,    1,    2,    2,    2,    2,    3,
            3,    4,    4,    5,    5,    6,    6,    6,    6,    6,    7,    7,
            7,    7,    7,    7,    8,    8,    8,    9,    9,    9,    9,    9,
            9,    0,    0,    1,    1,    1,    2,    2,    2,    6,    6,    7,
            8,    8,    9,    9,    9,    9,    0,    1,    1,    2,    3,    3,
            4,    4,    4,    7,    7,    8,    8,    8,    8,    1,    4,    4,
            5,    5,    6,    8,    8,    0,    1,    1,    1,    4,    5,    8,
            3,    5,    9,    0,    4,    7,    7,    2,    0,    0,    6,    0,
            6,    0,    6,    2],
        [6711, 6713, 6714, 6715, 6718, 6719, 6722, 6725, 6727, 6728, 6729, 

100%|██████████| 750/750 [00:46<00:00, 16.25it/s]


Epoch 35/64, Train Loss: 1.0531, Val Loss: 1.0449, Val Accuracy: 0.7809
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(108.6543, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   6,    1,    1,    1,    1,    2,    2,    4,    5,    6,    7,    7,
            7,    7,    7,    8,    9,    9,    1,    1,    2,    2,    7,    8,
            8,    9,    9,    9,    0,    1,    2,    3,    4,    4,    7,    8,
            8,    1,    4,    5,    8,    1,    5,    8,    3,    5,    9,    0,
            4,    7,    7,    2,    0,    0,    6,    0,    6,    2],
        [6831, 6875, 6876, 6877, 6879, 6881, 6883, 6888, 6890, 6891, 6897, 6898,
         6899, 6900, 6901, 6903, 6905, 6908, 6913, 6914, 6916, 6917, 6921, 6922,
         6923, 6924, 6925, 6926, 6928, 6930, 6931, 6933, 6934, 6935, 6937, 6939,
         6942, 6943, 6945, 6946, 6950, 6953, 6956, 6957, 6958, 6959, 6960, 6961,
         6962, 6963, 6964, 6965, 6966, 

100%|██████████| 750/750 [00:46<00:00, 16.03it/s]


Epoch 36/64, Train Loss: 1.0324, Val Loss: 1.0254, Val Accuracy: 0.7806
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(100.0614, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   1,    1,    2,    2,    4,    5,    7,    7,    9,    9,    1,    2,
            7,    8,    0,    1,    3,    4,    8,    5,    1,    5,    8,    3,
            9,    7,    7,    2,    0,    0,    6,    0],
        [6976, 6978, 6979, 6980, 6981, 6982, 6984, 6986, 6990, 6991, 6992, 6995,
         6996, 6997, 7002, 7003, 7005, 7007, 7010, 7013, 7015, 7016, 7017, 7018,
         7020, 7023, 7024, 7025, 7026, 7027, 7028, 7029]]) 32


100%|██████████| 750/750 [00:54<00:00, 13.72it/s]


Epoch 37/64, Train Loss: 1.0095, Val Loss: 1.0034, Val Accuracy: 0.7805
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(91.5124, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   1,    1,    2,    2,    4,    7,    9,    7,    0,    1,    3,    8,
            5,    1,    5,    8,    3,    7,    2,    0,    6,    0],
        [7032, 7033, 7034, 7035, 7036, 7039, 7041, 7044, 7046, 7047, 7048, 7050,
         7051, 7052, 7053, 7054, 7055, 7058, 7059, 7060, 7062, 7063]]) 22


100%|██████████| 750/750 [00:57<00:00, 12.94it/s]


Epoch 38/64, Train Loss: 0.9895, Val Loss: 0.9847, Val Accuracy: 0.7813
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(136.1010, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    1,    2,    2,    2,    2,    4,    4,    5,    6,    7,
            7,    7,    7,    8,    9,    9,    9,    9,    9,    0,    2,    2,
            2,    3,    3,    3,    4,    4,    4,    5,    5,    6,    6,    6,
            6,    6,    8,    8,    9,    9,    0,    0,    1,    1,    1,    2,
            3,    3,    4,    5,    6,    6,    6,    6,    6,    7,    9,    9,
            1,    5,    0,    3,    8,    2,    2,    4,    7,    2,    4,    6,
            7,    7,    5,    5,    2,    4,    6,    1,    1,    4,    4,    5,
            1,    3,    5,    6,    7,    8,    9,    5,    9,    1,    6,    7,
            0,    5,    7,    8,    7,    1,    3,    5,    7,    4,    4,    0,
            7,    8,    6,  

100%|██████████| 750/750 [01:01<00:00, 12.18it/s]


Epoch 39/64, Train Loss: 0.9800, Val Loss: 0.9748, Val Accuracy: 0.7775
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(114.8763, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   0,    0,    1,    2,    2,    4,    5,    7,    7,    8,    9,    9,
            9,    0,    2,    2,    4,    4,    4,    5,    6,    6,    6,    6,
            8,    8,    9,    9,    0,    0,    1,    1,    1,    2,    4,    6,
            6,    7,    9,    3,    8,    2,    4,    6,    7,    7,    2,    4,
            1,    1,    4,    5,    1,    5,    7,    8,    9,    5,    9,    6,
            8,    7,    4,    0,    7,    8,    6,    7,    0,    4,    0,    1,
            6,    8,    6,    1,    1,    6,    7,    1,    8,    4,    2,    1,
            2,    7,    1,    8,    5,    1,    7,    0,    0],
        [7086, 7087, 7088, 7089, 7090, 7093, 7095, 7097, 7098, 7101, 7102, 7104,
         7106, 7107, 7108, 7109, 7114, 7115, 

100%|██████████| 750/750 [01:09<00:00, 10.85it/s]


Epoch 40/64, Train Loss: 0.9739, Val Loss: 0.9684, Val Accuracy: 0.7842
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(109.3760, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   2,    6,    5,    0,    1,    2,    2,    5,    7,    8,    9,    9,
            9,    2,    2,    4,    6,    6,    6,    9,    0,    1,    1,    2,
            4,    6,    6,    9,    3,    8,    2,    4,    6,    7,    7,    1,
            5,    7,    8,    9,    9,    6,    8,    7,    0,    8,    6,    7,
            0,    0,    1,    6,    1,    1,    7,    1,    8,    4,    2,    1,
            8,    5,    1,    0,    0],
        [7091, 7141, 7202, 7236, 7237, 7238, 7239, 7241, 7243, 7244, 7245, 7246,
         7247, 7249, 7250, 7253, 7256, 7257, 7258, 7261, 7263, 7265, 7266, 7268,
         7269, 7270, 7271, 7273, 7274, 7275, 7276, 7277, 7278, 7279, 7280, 7287,
         7288, 7289, 7290, 7291, 7293, 7294, 7295, 7296, 7298, 7300, 

100%|██████████| 750/750 [01:13<00:00, 10.22it/s]


Epoch 41/64, Train Loss: 0.9554, Val Loss: 0.9518, Val Accuracy: 0.7847
len(vals)=7840 max(vals)=tensor(1., grad_fn=<UnbindBackward0>)  sum(vals)=tensor(90.2769, grad_fn=<AddBackward0>) min(vals)=tensor(0., grad_fn=<UnbindBackward0>)
choose: tensor([[   2,    6,    1,    2,    5,    8,    9,    2,    4,    6,    6,    6,
            9,    1,    2,    6,    6,    8,    4,    1,    7,    8,    6,    8,
            7,    0,    0,    1,    6,    1,    1,    7,    1,    4,    2,    8,
            1,    0,    0],
        [7328, 7329, 7332, 7334, 7335, 7337, 7340, 7341, 7343, 7344, 7345, 7346,
         7347, 7349, 7351, 7353, 7354, 7357, 7359, 7363, 7365, 7366, 7369, 7373,
         7375, 7376, 7377, 7378, 7379, 7380, 7381, 7382, 7383, 7385, 7386, 7388,
         7390, 7391, 7392]]) 39


 64%|██████▎   | 478/750 [00:43<00:24, 10.99it/s]


KeyboardInterrupt: 