In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from torchsummary import summary

from torchvision import datasets, transforms

In [2]:
from copy import deepcopy

import numpy as np
import pandas as pd

from termcolor import cprint

In [3]:
data_path = "/tmp/files/"

tensor_mnist = datasets.MNIST(
    data_path, train=True, download=True, transform=transforms.ToTensor()
)

tensor_images = torch.stack([tensor_image for tensor_image, _ in tensor_mnist], dim=3)

tensor_images.shape

torch.Size([1, 28, 28, 60000])

In [4]:
tensor_mean = tensor_images.view(1, -1).mean(dim=1)
tensor_mean

tensor([0.1307])

In [5]:
tensor_std = tensor_images.view(1, -1).std(dim=1)
tensor_std

tensor([0.3081])

In [6]:
training_dataset = datasets.MNIST(
    "/tmp/files/",
    train=True,
    download=True,
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize(tensor_mean, tensor_std),
        ]
    ),
)

validation_dataset = datasets.MNIST(
    "/tmp/files/",
    train=False,
    download=True,
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize(tensor_mean, tensor_std),
        ]
    ),
)

In [7]:
training_dataset.data.shape, training_dataset.targets.shape

(torch.Size([60000, 28, 28]), torch.Size([60000]))

In [8]:
training_dataset.data[0].shape, training_dataset.targets[0].item()

(torch.Size([28, 28]), 5)

In [9]:
training_df = pd.DataFrame(
    {
        "image": [training_dataset.data[i].numpy() for i in range(len(training_dataset))],
        "label": training_dataset.targets.numpy().astype(str)
    }
)

training_df.head()

Unnamed: 0,image,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",5
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",4
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",9


In [10]:
training_df['label'].sort_values().unique()

array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], dtype=object)

In [11]:
df_list = []
for i in training_df["label"].sort_values().unique():
    cprint(
        f"\nLabel {i} count: {len(training_df[training_df['label'] == i])}",
        "green",
        attrs=["bold"],
        end="\n" + "-" * 60 + "\n",
    )
    exec(f"training_df_{i} = deepcopy(training_df)")
    exec(f"training_df_{i}.loc[training_df_{i}['label'] != i, 'label'] = 0")
    exec(f"training_df_{i}.loc[training_df_{i}['label'] == i, 'label'] = 1")
    exec(f"cprint(training_df_{i}.head(), 'magenta')")
    exec(f"cprint(training_df_{i}['label'].value_counts()\n, 'red')")
    exec(f"df_list.append(training_df_{i})")

[1m[32m
Label 0 count: 5923[0m
------------------------------------------------------------
[35m                                               image label
0  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
1  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     1
2  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
3  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
4  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0[0m
[31mlabel
0    54077
1     5923
Name: count, dtype: int64[0m
[1m[32m
Label 1 count: 6742[0m
------------------------------------------------------------
[35m                                               image label
0  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
1  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
2  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
3  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     1
4  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0[0m
[31mlabel
0    53258

In [12]:
key_list = [str(int(i)) for i in list(training_df["label"].sort_values().unique())]
df_dict = dict(zip(key_list, df_list))
df_dict

{'0':                                                    image label
 0      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 1      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     1
 2      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 3      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 4      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 ...                                                  ...   ...
 59995  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 59996  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 59997  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 59998  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 59999  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 
 [60000 rows x 2 columns],
 '1':                                                    image label
 0      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 1      [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...     0
 

In [13]:
if torch.backends.mps.is_available():
    cprint("MPS is available", "green")
    device = torch.device("mps:0")
elif torch.backends.cuda.is_available():
    cprint("CUDA is available", "green")
    device = torch.device("cuda:0")
elif torch.backends.cudnn.is_built():
    cprint("CUDNN is available", "green")
    device = torch.device("cuda:0")
else:
    cprint("CUDA and MPS are not available", "red")
    cprint("Using CPU", "red")
    device = torch.device("cpu")

[32mMPS is available[0m


In [14]:
class NetOVA(nn.Module):
    def __init__(self):
        super(NetOVA, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.fc3 = nn.Linear(10, 1)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

In [15]:
torch.manual_seed(0)
model = NetOVA()
summary(model, next(iter(training_dataset))[0].shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 24, 24]             260
            Conv2d-2             [-1, 20, 8, 8]           5,020
         Dropout2d-3             [-1, 20, 8, 8]               0
            Linear-4                   [-1, 50]          16,050
            Linear-5                   [-1, 10]             510
            Linear-6                    [-1, 1]              11
Total params: 21,851
Trainable params: 21,851
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.08
Estimated Total Size (MB): 0.15
----------------------------------------------------------------


In [16]:
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
model = NetOVA().to(device)
training_dataset_0 = TensorDataset(
    torch.tensor(training_df_0["image"].tolist()).float(),
    torch.tensor(training_df_0["label"].tolist()).float(),
)

training_loader_0 = DataLoader(
    training_dataset_0, batch_size=100, shuffle=True
)

validation_loader = DataLoader(
    validation_dataset, batch_size=500, shuffle=False
)

  torch.tensor(training_df_0["image"].tolist()).float(),


In [18]:
for batch in training_loader_0:
    images, labels = batch[0].to(device), batch[1].to(device)
    images = images.view(-1, 1, 28, 28)
    labels = labels.view(-1, 1)
    labels = labels.squeeze()
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

In [19]:
states_list = []
for i in training_df["label"].sort_values().unique():
    exec(f"model_{i} = NetOVA().to(device)")
    exec(f"states_list.append(model_{i}.state_dict())")

key_list = [str(int(i)) for i in list(training_df["label"].sort_values().unique())]
states_dict = dict(zip(key_list, states_list))

states_dict["0"].keys(), states_dict["0"].values()

(odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias']),
 odict_values([tensor([[[[ 0.1932, -0.1005,  0.1750, -0.2000, -0.1022],
           [ 0.1497, -0.1629,  0.1259, -0.0351, -0.1802],
           [ 0.0974, -0.1965, -0.1756, -0.0763,  0.1163],
           [ 0.1791, -0.1161, -0.0511, -0.0604, -0.1682],
           [-0.0989,  0.1195,  0.0611, -0.1110, -0.1632]]],
 
 
         [[[-0.0805, -0.1928,  0.1552, -0.0591,  0.0533],
           [ 0.0423,  0.0169,  0.0772,  0.1678,  0.1895],
           [-0.0286, -0.1887,  0.0718, -0.0574,  0.0026],
           [ 0.0496,  0.0512, -0.1805, -0.0023,  0.1768],
           [-0.1158, -0.1142, -0.1150, -0.1851,  0.0492]]],
 
 
         [[[-0.0402,  0.1199, -0.0979,  0.0514,  0.1754],
           [ 0.1503,  0.1153, -0.0177, -0.0165, -0.0145],
           [ 0.0228,  0.1312, -0.1563,  0.1054,  0.0475],
           [ 0.1279,  0.1186, -0.1198,  0.1738, -0.1268],
         

In [20]:
epochs = 25
for _ in range(epochs):
    for i, (df, key, state) in enumerate(
        zip(df_list, states_dict.keys(), states_dict.values())
    ):
        model.load_state_dict(state)
        model.eval()
        training_dataset_tmp = TensorDataset(
            torch.tensor(df["image"].tolist()).float(),
            torch.tensor(df["label"].tolist()).float(),
        )

        training_loader_tmp = DataLoader(
            training_dataset_tmp, batch_size=100, shuffle=True
        )

        for batch in training_loader_tmp:
            images, labels = batch[0].to(device), batch[1].to(device)
            images = images.view(-1, 1, 28, 28)
            labels = labels.view(-1, 1)
            labels = labels.squeeze()
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        states_dict[key] = model.state_dict()

In [21]:
states_dict["0"].keys(), states_dict["0"].values()

(odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias']),
 odict_values([tensor([[[[-0.1093, -0.0094,  0.1022,  0.1257,  0.0696],
           [-0.0066,  0.0190, -0.0439, -0.1876,  0.0011],
           [-0.0950,  0.0365, -0.1841, -0.1786, -0.1127],
           [ 0.1191, -0.0035, -0.0687,  0.0073, -0.1838],
           [-0.1385, -0.0984, -0.0959, -0.0957,  0.1596]]],
 
 
         [[[ 0.1719, -0.0135, -0.1285,  0.0196, -0.1272],
           [-0.1522, -0.1338, -0.1475, -0.0045,  0.1361],
           [ 0.1439, -0.1839, -0.0317, -0.1769,  0.0250],
           [ 0.1971, -0.0145,  0.1340, -0.0624, -0.1878],
           [-0.1195,  0.0369,  0.0744,  0.1727,  0.0762]]],
 
 
         [[[-0.1160, -0.1131,  0.1773, -0.1775, -0.1261],
           [-0.0848,  0.1454,  0.1443,  0.1866, -0.1348],
           [-0.0643,  0.0082,  0.1951, -0.1323, -0.0585],
           [-0.0086, -0.0386, -0.1575,  0.1781, -0.0171],
         

In [22]:
for key, state in states_dict.items():
    with torch.no_grad():
        for val_images, val_labels in validation_loader:
            model.load_state_dict(state)
            val_images = val_images.to(device)
            val_labels = val_labels.to(device)
            val_outputs = model(val_images)
            val_loss = criterion(val_outputs, val_labels)
            if max(val_outputs) != 0:
                print(max(val_outputs))