In [None]:
import numpy as np
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import torchvision
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader

from torchvision import transforms
from torchvision.datasets.mnist import MNIST
import h5py 
from functools import reduce
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f77ffa918b0>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
from torchvision.transforms import ToTensor
import torchvision.datasets as dsets

In [None]:
import scipy.io
mat = scipy.io.loadmat('/content/drive/MyDrive/train_32x32.mat')

In [None]:
mat['y'][22] 

array([2], dtype=uint8)

SVHN Data and label Creation

Training the ViT on SVHN dataset and using it on MNIST and USPS dataset

In [None]:
Data = []
Label = []
SHVN_zero = []

for i in range(mat['X'].T.shape[0]):
    a = mat['X'].T[i]
    a = np.mean(a, axis = 0)
    a = a.reshape((1, 32, 32)) 

    Data.append(a)
    Label.append(mat['y'][i])

Data = np.asarray(Data)
Label = np.asarray(Label)

Label[Label == 10] = 0

max(Label)

array([9], dtype=uint8)

In [None]:
Label.shape

(73257, 1)

In [None]:
Data[5].shape

(1, 32, 32)

In [None]:
import torch.utils as utils

train_x = torch.Tensor(Data)
train_y = torch.Tensor(Label)

print(train_x[55].shape)
SVHN_dataset = utils.data.TensorDataset(train_x, train_y)

torch.Size([1, 32, 32])


In [None]:
def patchify(images, n_patches):
    n, c, h, w = images.shape

    assert h == w, "Patchify method is implemented for square images only"

    patches = torch.zeros(n, n_patches ** 2, h * w * c // n_patches ** 2)
    patch_size = h // n_patches

    for idx, image in enumerate(images):
        for i in range(n_patches):
            for j in range(n_patches):
                patch = image[:, i * patch_size: (i + 1) * patch_size, j * patch_size: (j + 1) * patch_size]
                patches[idx, i * n_patches + j] = patch.flatten()
    return patches

In [None]:
def get_positional_embeddings(sequence_length, d):
    result = torch.ones(sequence_length, d)
    for i in range(sequence_length):
        for j in range(d):
            result[i][j] = np.sin(i / (10000 ** (j / d))) if j % 2 == 0 else np.cos(i / (10000 ** ((j - 1) / d)))
    return result

In [None]:
class MyMSA(nn.Module):
    def __init__(self, d, n_heads=2):
        super(MyMSA, self).__init__()
        self.d = d
        self.n_heads = n_heads

        assert d % n_heads == 0, f"Can't divide dimension {d} into {n_heads} heads"

        d_head = int(d / n_heads)
        self.q_mappings = nn.ModuleList([nn.Linear(d_head, d_head) for _ in range(self.n_heads)])
        self.k_mappings = nn.ModuleList([nn.Linear(d_head, d_head) for _ in range(self.n_heads)])
        self.v_mappings = nn.ModuleList([nn.Linear(d_head, d_head) for _ in range(self.n_heads)])
        self.d_head = d_head
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, sequences):
        # Sequences has shape (N, seq_length, token_dim)
        # We go into shape    (N, seq_length, n_heads, token_dim / n_heads)
        # And come back to    (N, seq_length, item_dim)  (through concatenation)
        result = []
        for sequence in sequences:
            seq_result = []
            for head in range(self.n_heads):
                q_mapping = self.q_mappings[head]
                k_mapping = self.k_mappings[head]
                v_mapping = self.v_mappings[head]

                seq = sequence[:, head * self.d_head: (head + 1) * self.d_head]
                q, k, v = q_mapping(seq), k_mapping(seq), v_mapping(seq)

                attention = self.softmax(q @ k.T / (self.d_head ** 0.5))
                seq_result.append(attention @ v)
            result.append(torch.hstack(seq_result))

        attention = torch.cat([torch.unsqueeze(r, dim=0) for r in result])
        return attention

In [None]:
class MyViTBlock(nn.Module):
    def __init__(self, hidden_d, n_heads, mlp_ratio=4):
        super(MyViTBlock, self).__init__()
        self.hidden_d = hidden_d
        self.n_heads = n_heads

        self.norm1 = nn.LayerNorm(hidden_d)
        self.mhsa = MyMSA(hidden_d, n_heads)
        self.norm2 = nn.LayerNorm(hidden_d)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_d, mlp_ratio * hidden_d),
            nn.GELU(),
            nn.Linear(mlp_ratio * hidden_d, hidden_d)
        )

    def forward(self, x):
        #print(x.shape) 
        out = self.mhsa(self.norm1(x))
        out = out + self.mlp(self.norm2(out))
        return out

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
class MyViT(nn.Module):
    def __init__(self, chw, n_patches=7, n_blocks=2, hidden_d=16, n_heads=2, out_d=10):
        # Super constructor
        super(MyViT, self).__init__()
        
        # Attributes
        self.chw = chw # ( C , H , W )
        self.n_patches = n_patches
        self.n_blocks = n_blocks
        self.n_heads = n_heads
        self.hidden_d = hidden_d
        
        # Input and patches sizes
        assert chw[1] % n_patches == 0, "Input shape not entirely divisible by number of patches"
        assert chw[2] % n_patches == 0, "Input shape not entirely divisible by number of patches"
        self.patch_size = (chw[1] / n_patches, chw[2] / n_patches)

        # 1) Linear mapper
        self.input_d = int(chw[0] * self.patch_size[0] * self.patch_size[1])
        self.linear_mapper = nn.Linear(self.input_d, self.hidden_d)
        
        # 2) Learnable classification token
        self.class_token = nn.Parameter(torch.rand(1, self.hidden_d))
        
        # 3) Positional embedding
        self.register_buffer('positional_embeddings', get_positional_embeddings(n_patches ** 2 + 1, hidden_d), persistent=False)
        
        # 4) Transformer encoder blocks
        self.blocks = nn.ModuleList([MyViTBlock(hidden_d, n_heads) for _ in range(n_blocks)])
        
        # 5) Classification MLPk
        self.mlp = nn.Sequential(
            nn.Linear(self.hidden_d, out_d),
            nn.Softmax(dim=-1)
        )

    def forward(self, images):
        # Dividing images into patches
        n, c, h, w = images.shape
        patches = patchify(images, self.n_patches).to(self.positional_embeddings.device)
        
        # Running linear layer tokenization
        # Map the vector corresponding to each patch to the hidden size dimension
        tokens = self.linear_mapper(patches)
        
        # Adding classification token to the tokens
        tokens = torch.cat((self.class_token.expand(n, 1, -1), tokens), dim=1)
        
        # Adding positional embedding
        out = tokens + self.positional_embeddings.repeat(n, 1, 1)
        
        # Transformer Blocks
        for block in self.blocks:
            out = block(out)
            
        # Getting the classification token only
        out = out[:, 0]
        
        return self.mlp(out)

In [None]:
model = MyViT((1, 32, 32), n_patches=8, n_blocks=2, hidden_d=16, n_heads=2, out_d=10)

In [None]:
import PIL
from PIL import Image
if not hasattr(PIL.Image, 'Resampling'):  # Pillow<9.0
  PIL.Image.Resampling = PIL.Image

In [None]:
transform = ToTensor()

transform = transforms.Compose([transforms.Resize(32), # resize to [32,32]
        transforms.ToTensor()])

train_set = MNIST(root='./../datasets', train=True, download=True, transform=transform)
train_loader = utils.data.DataLoader(SVHN_dataset,shuffle=False, batch_size=128)
train_loader2 = DataLoader(train_set, shuffle=True, batch_size=128) 

#test_loader = DataLoader(test_set, shuffle=False, batch_size=128)

# Defining model and training options
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: ", device, f"({torch.cuda.get_device_name(device)})" if torch.cuda.is_available() else "")
model = MyViT((1, 32, 32), n_patches=8, n_blocks=2, hidden_d=16, n_heads=2, out_d=10)
N_EPOCHS = 3
LR = 0.005

# Training loop
optimizer = Adam(model.parameters(), lr=LR)
criterion = CrossEntropyLoss()
for epoch in trange(N_EPOCHS, desc="Training"):
    train_loss = 0.0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1} in training", leave=False):
        x, y = batch
        x, y = x, y
        y = torch.reshape(y, (-1,))
        y = y.type(torch.LongTensor)
        y_hat = model(x)
        #print(y)
        loss = criterion(y_hat, y)

        train_loss += loss.detach().cpu().item() / len(train_loader)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {train_loss:.2f}")


print("Entering Phas 2")

for epoch in trange(N_EPOCHS, desc="Training"):
    train_loss = 0.0
    for batch in tqdm(train_loader2, desc=f"Epoch {epoch + 1} in training", leave=False):
        x, y = batch
        x, y = x, y
        y = torch.reshape(y, (-1,))
        y = y.type(torch.LongTensor)
        y_hat = model(x)
        #print(y)
        loss = criterion(y_hat, y)

        train_loss += loss.detach().cpu().item() / len(train_loader)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {train_loss:.2f}")

Using device:  cpu 


Training:   0%|          | 0/3 [00:00<?, ?it/s]
Epoch 1 in training:   0%|          | 0/573 [00:00<?, ?it/s][A
Epoch 1 in training:   0%|          | 1/573 [00:01<18:40,  1.96s/it][A
Epoch 1 in training:   0%|          | 2/573 [00:03<18:11,  1.91s/it][A
Epoch 1 in training:   1%|          | 3/573 [00:04<13:49,  1.45s/it][A
Epoch 1 in training:   1%|          | 4/573 [00:05<10:48,  1.14s/it][A
Epoch 1 in training:   1%|          | 5/573 [00:05<08:47,  1.08it/s][A
Epoch 1 in training:   1%|          | 6/573 [00:06<07:30,  1.26it/s][A
Epoch 1 in training:   1%|          | 7/573 [00:07<06:47,  1.39it/s][A
Epoch 1 in training:   1%|▏         | 8/573 [00:07<06:18,  1.49it/s][A
Epoch 1 in training:   2%|▏         | 9/573 [00:08<05:55,  1.58it/s][A
Epoch 1 in training:   2%|▏         | 10/573 [00:08<05:44,  1.64it/s][A
Epoch 1 in training:   2%|▏         | 11/573 [00:09<05:38,  1.66it/s][A
Epoch 1 in training:   2%|▏         | 12/573 [00:09<05:31,  1.69it/s][A
Epoch 1 in training: 

Epoch 1/3 loss: 2.21



Epoch 2 in training:   0%|          | 0/573 [00:00<?, ?it/s][A
Epoch 2 in training:   0%|          | 1/573 [00:00<05:30,  1.73it/s][A
Epoch 2 in training:   0%|          | 2/573 [00:01<05:05,  1.87it/s][A
Epoch 2 in training:   1%|          | 3/573 [00:01<05:10,  1.83it/s][A
Epoch 2 in training:   1%|          | 4/573 [00:02<05:09,  1.84it/s][A
Epoch 2 in training:   1%|          | 5/573 [00:02<05:15,  1.80it/s][A
Epoch 2 in training:   1%|          | 6/573 [00:03<05:14,  1.80it/s][A
Epoch 2 in training:   1%|          | 7/573 [00:03<05:14,  1.80it/s][A
Epoch 2 in training:   1%|▏         | 8/573 [00:04<05:11,  1.81it/s][A
Epoch 2 in training:   2%|▏         | 9/573 [00:04<05:11,  1.81it/s][A
Epoch 2 in training:   2%|▏         | 10/573 [00:05<05:07,  1.83it/s][A
Epoch 2 in training:   2%|▏         | 11/573 [00:06<05:02,  1.86it/s][A
Epoch 2 in training:   2%|▏         | 12/573 [00:06<05:00,  1.87it/s][A
Epoch 2 in training:   2%|▏         | 13/573 [00:07<05:00,  1.86it/s

Epoch 2/3 loss: 2.20



Epoch 3 in training:   0%|          | 0/573 [00:00<?, ?it/s][A
Epoch 3 in training:   0%|          | 1/573 [00:00<05:03,  1.89it/s][A
Epoch 3 in training:   0%|          | 2/573 [00:01<05:01,  1.89it/s][A
Epoch 3 in training:   1%|          | 3/573 [00:01<04:59,  1.90it/s][A
Epoch 3 in training:   1%|          | 4/573 [00:02<04:54,  1.93it/s][A
Epoch 3 in training:   1%|          | 5/573 [00:02<04:52,  1.94it/s][A
Epoch 3 in training:   1%|          | 6/573 [00:03<04:54,  1.92it/s][A
Epoch 3 in training:   1%|          | 7/573 [00:03<04:57,  1.91it/s][A
Epoch 3 in training:   1%|▏         | 8/573 [00:04<04:53,  1.92it/s][A
Epoch 3 in training:   2%|▏         | 9/573 [00:04<04:52,  1.93it/s][A
Epoch 3 in training:   2%|▏         | 10/573 [00:05<04:53,  1.92it/s][A
Epoch 3 in training:   2%|▏         | 11/573 [00:05<04:54,  1.91it/s][A
Epoch 3 in training:   2%|▏         | 12/573 [00:06<04:52,  1.92it/s][A
Epoch 3 in training:   2%|▏         | 13/573 [00:06<04:50,  1.93it/s

Epoch 3/3 loss: 2.20
Entering Phas 2


Training:   0%|          | 0/3 [00:00<?, ?it/s]
Epoch 1 in training:   0%|          | 0/469 [00:00<?, ?it/s][A
Epoch 1 in training:   0%|          | 1/469 [00:00<06:47,  1.15it/s][A
Epoch 1 in training:   0%|          | 2/469 [00:01<06:36,  1.18it/s][A
Epoch 1 in training:   1%|          | 3/469 [00:02<06:28,  1.20it/s][A
Epoch 1 in training:   1%|          | 4/469 [00:03<06:17,  1.23it/s][A
Epoch 1 in training:   1%|          | 5/469 [00:03<05:33,  1.39it/s][A
Epoch 1 in training:   1%|▏         | 6/469 [00:04<05:07,  1.50it/s][A
Epoch 1 in training:   1%|▏         | 7/469 [00:04<04:48,  1.60it/s][A
Epoch 1 in training:   2%|▏         | 8/469 [00:05<04:58,  1.54it/s][A
Epoch 1 in training:   2%|▏         | 9/469 [00:06<04:42,  1.63it/s][A
Epoch 1 in training:   2%|▏         | 10/469 [00:06<04:34,  1.67it/s][A
Epoch 1 in training:   2%|▏         | 11/469 [00:07<04:23,  1.74it/s][A
Epoch 1 in training:   3%|▎         | 12/469 [00:07<04:31,  1.69it/s][A
Epoch 1 in training: 

Epoch 1/3 loss: 1.81



Epoch 2 in training:   0%|          | 0/469 [00:00<?, ?it/s][A
Epoch 2 in training:   0%|          | 1/469 [00:00<06:45,  1.16it/s][A
Epoch 2 in training:   0%|          | 2/469 [00:01<05:49,  1.34it/s][A
Epoch 2 in training:   1%|          | 3/469 [00:02<05:08,  1.51it/s][A
Epoch 2 in training:   1%|          | 4/469 [00:02<04:51,  1.59it/s][A
Epoch 2 in training:   1%|          | 5/469 [00:03<04:39,  1.66it/s][A
Epoch 2 in training:   1%|▏         | 6/469 [00:03<04:33,  1.69it/s][A
Epoch 2 in training:   1%|▏         | 7/469 [00:04<04:27,  1.73it/s][A
Epoch 2 in training:   2%|▏         | 8/469 [00:04<04:26,  1.73it/s][A
Epoch 2 in training:   2%|▏         | 9/469 [00:05<04:25,  1.74it/s][A
Epoch 2 in training:   2%|▏         | 10/469 [00:06<04:21,  1.75it/s][A
Epoch 2 in training:   2%|▏         | 11/469 [00:06<04:19,  1.77it/s][A
Epoch 2 in training:   3%|▎         | 12/469 [00:07<04:17,  1.78it/s][A
Epoch 2 in training:   3%|▎         | 13/469 [00:07<04:20,  1.75it/s

Epoch 2/3 loss: 1.68



Epoch 3 in training:   0%|          | 0/469 [00:00<?, ?it/s][A
Epoch 3 in training:   0%|          | 1/469 [00:00<07:04,  1.10it/s][A
Epoch 3 in training:   0%|          | 2/469 [00:01<05:39,  1.38it/s][A
Epoch 3 in training:   1%|          | 3/469 [00:02<05:07,  1.51it/s][A
Epoch 3 in training:   1%|          | 4/469 [00:02<05:26,  1.43it/s][A
Epoch 3 in training:   1%|          | 5/469 [00:03<05:52,  1.32it/s][A
Epoch 3 in training:   1%|▏         | 6/469 [00:04<06:13,  1.24it/s][A
Epoch 3 in training:   1%|▏         | 7/469 [00:05<06:29,  1.19it/s][A
Epoch 3 in training:   2%|▏         | 8/469 [00:06<06:34,  1.17it/s][A
Epoch 3 in training:   2%|▏         | 9/469 [00:07<06:28,  1.19it/s][A
Epoch 3 in training:   2%|▏         | 10/469 [00:07<05:57,  1.28it/s][A
Epoch 3 in training:   2%|▏         | 11/469 [00:08<05:35,  1.37it/s][A
Epoch 3 in training:   3%|▎         | 12/469 [00:09<05:16,  1.44it/s][A
Epoch 3 in training:   3%|▎         | 13/469 [00:09<05:07,  1.48it/s

Epoch 3/3 loss: 1.64





In [None]:
mat_test = scipy.io.loadmat('/content/drive/MyDrive/test_32x32.mat')

Data_test = []
Label_test = []

for i in range(mat_test['X'].T.shape[0]):
    a = mat['X'].T[i]
    a = np.mean(a, axis = 0)
    a = a.reshape((1, 32, 32)) 


    Data_test.append(a)
    Label_test.append(mat_test['y'][i])

Data_test = np.asarray(Data_test)
Label_test = np.asarray(Label_test)

Label_test[Label_test == 10] = 1

print(max(Label_test))

test_x = torch.Tensor(Data_test)
test_y = torch.Tensor(Label_test)

print(train_x[55].shape)
SVHN_dataset_test = utils.data.TensorDataset(test_x, test_y)

test_loader = DataLoader(SVHN_dataset_test, shuffle=False, batch_size=128)

NameError: ignored

In [None]:
with torch.no_grad():
    correct, total = 0, 0
    test_loss = 0.0
    for batch in tqdm(test_loader, desc="Testing"):
        x, y = batch
        x, y = x, y
        y = torch.reshape(y, (-1,))
        y = y.type(torch.LongTensor)
        y_hat = model(x)
        loss = criterion(y_hat, y)
        test_loss += loss.detach().cpu().item() / len(test_loader)

        correct += torch.sum(torch.argmax(y_hat, dim=1) == y).detach().cpu().item()
        total += len(x)
    print(f"Test loss: {test_loss:.2f}")
    print(f"Test accuracy: {correct / total * 100:.2f}%")

Testing: 100%|██████████| 204/204 [01:07<00:00,  3.02it/s]

Test loss: 2.35
Test accuracy: 11.07%





In [None]:
class USPS_Dataset(torch.utils.data.Dataset):
    def __init__(self, path, key):
        self.file_path = path
        self.dataset = None
        with h5py.File(self.file_path, 'r') as file:
            key_set = file.get(key)
            X = key_set.get("data")[:]
            y = key_set.get("target")[:]
        X = X.reshape(X.shape[0], 1, int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1])))
        y = torch.from_numpy(y)
        y = y.type(torch.LongTensor)
        self.dataset = [(image, target) for image, target in zip(X, y)]
        self.dataset_len = len(self.dataset)


    def __getitem__(self, index):
        X, y = self.dataset[index]
        tmp = Image.fromarray(X[0])
        tmp = tmp.resize((32, 32), resample=Image.Resampling.BICUBIC)
        tmp = np.array(tmp, dtype=np.uint8)
        tmp[tmp==0] = 1/255
        return (torch.from_numpy(tmp.reshape(1, tmp.shape[0], tmp.shape[1])), y)


    def __len__(self):
        return self.dataset_len

In [None]:
test_set = USPS_Dataset("/content/drive/MyDrive/usps.h5", "train")
test_loader = DataLoader(test_set, shuffle=False, batch_size=128)

In [None]:
with torch.no_grad():
    correct, total = 0, 0
    test_loss = 0.0
    for batch in tqdm(test_loader, desc="Testing"):
        x, y = batch
        x, y = x, y
        y_hat = model(x)
        loss = criterion(y_hat, y)
        test_loss += loss.detach().cpu().item() / len(test_loader)

        correct += torch.sum(torch.argmax(y_hat, dim=1) == y).detach().cpu().item()
        total += len(x)
    print(f"Test loss: {test_loss:.2f}")
    print(f"Test accuracy: {correct / total * 100:.2f}%")

Testing: 100%|██████████| 57/57 [00:20<00:00,  2.85it/s]

Test loss: 2.21
Test accuracy: 24.08%





In [None]:
import torchvision
import tempfile

dataset = torchvision.datasets.MNIST(root=tempfile.gettempdir(), download=True,
    train=False,transform=torchvision.transforms.Compose([
        torchvision.transforms.Resize(32), torchvision.transforms.ToTensor()]
    ),
)
test_loader = DataLoader(dataset, shuffle=False, batch_size=128)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /tmp/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 92028694.41it/s]


Extracting /tmp/MNIST/raw/train-images-idx3-ubyte.gz to /tmp/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /tmp/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 33024998.32it/s]

Extracting /tmp/MNIST/raw/train-labels-idx1-ubyte.gz to /tmp/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /tmp/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 25991383.91it/s]


Extracting /tmp/MNIST/raw/t10k-images-idx3-ubyte.gz to /tmp/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /tmp/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14080213.43it/s]


Extracting /tmp/MNIST/raw/t10k-labels-idx1-ubyte.gz to /tmp/MNIST/raw



In [None]:
with torch.no_grad():
    correct, total = 0, 0
    test_loss = 0.0
    for batch in tqdm(test_loader, desc="Testing"):
        x, y = batch
        x, y = x, y
        y_hat = model(x)
        loss = criterion(y_hat, y)
        test_loss += loss.detach().cpu().item() / len(test_loader)

        correct += torch.sum(torch.argmax(y_hat, dim=1) == y).detach().cpu().item()
        total += len(x)
    print(f"Test loss: {test_loss:.2f}")
    print(f"Test accuracy: {correct / total * 100:.2f}%")

Testing: 100%|██████████| 79/79 [00:28<00:00,  2.74it/s]

Test loss: 1.98
Test accuracy: 47.92%





In [None]:
import h5py 
from functools import reduce
def hdf5(path, data_key = "data", target_key = "target", flatten = True):

    with h5py.File(path, 'r') as hf:
        train = hf.get('train')
        X_tr = train.get(data_key)[:]
        y_tr = train.get(target_key)[:]
        test = hf.get('test')
        X_te = test.get(data_key)[:]
        y_te = test.get(target_key)[:]
        if flatten:
            X_tr = X_tr.reshape(X_tr.shape[0], reduce(lambda a, b: a * b, X_tr.shape[1:]))
            X_te = X_te.reshape(X_te.shape[0], reduce(lambda a, b: a * b, X_te.shape[1:]))
    
    return X_tr, y_tr, X_te, y_te

In [None]:
X_usps_tr, y_usps_tr, X_usps_te, y_usps_te = hdf5("/content/drive/MyDrive/usps.h5")
X_usps_tr.shape, X_usps_te.shape

((7291, 256), (2007, 256))

In [None]:
num_samples = 10
num_classes = len(set(y_usps_tr))
usps_samples = []

classes = set(y_usps_tr)
num_classes = len(classes)
#fig, ax = plt.subplots(num_samples, num_classes, sharex = True, sharey = True, figsize=(num_classes, num_samples))

for label in range(num_classes):
    class_idxs = np.where(y_usps_tr == 8)
    for i, idx in enumerate(np.random.randint(0, class_idxs[0].shape[0], num_samples)):
        tmp = Image.fromarray(X_usps_tr[class_idxs[0][idx]]*255)
        tmp = tmp.resize((32,32), resample=Image.Resampling.BICUBIC)
        tmp = np.array(tmp, dtype=np.uint8)
        tmp[tmp==0] = 1
        usps_samples.append(tmp)

In [None]:
usps_samples = np.asarray(usps_samples)
usps_samples.shape

(100, 32, 32)

In [None]:
SHVN_samples = []
for i in range(mat['X'].T.shape[0]):
    a = mat['X'].T[i]
    a = np.mean(a, axis = 0)
    a.shape

    if (mat['y'][i][0] == 8):
        SHVN_samples.append(a)

SHVN_samples = np.asarray(SHVN_samples)

In [None]:
import torchvision
import tempfile

MNIST_samples = []

dataset = torchvision.datasets.MNIST(root=tempfile.gettempdir(), download=True,
    train=True,transform=torchvision.transforms.Compose([
        torchvision.transforms.Resize(32), torchvision.transforms.ToTensor()]
    ),
)
test_loader = DataLoader(dataset, shuffle=False, batch_size=1)

for batch in tqdm(test_loader, desc="Testing"):
    x, y = batch
    if(y == 1):
        x = x.detach().numpy()
        x[x==0] = 8
        MNIST_samples.append(x[0][0])
        

Testing: 100%|██████████| 60000/60000 [00:39<00:00, 1522.21it/s]


In [None]:
mnist_and_svhn = []

for i in range(100):
    a = MNIST_samples[i] + SHVN_samples[i]
    mnist_and_svhn.append(a)


mnist_and_svhn = np.asarray(mnist_and_svhn)

In [None]:
len(MNIST_samples)

5421

In [None]:
def D_KL(p, q):
    return np.dot(p/np.sum(p),
        (
            np.log(p/np.sum(p))
        ) - 
        (
            np.log(q/np.sum(q))
        )
    )

In [None]:
for j in range(5):
    a = np.random.permutation(99)
    print("j: ",j, ", D_KL: ", D_KL(SHVN_samples[0].ravel(), SHVN_samples[a[0]].ravel()))

j:  0 , D_KL:  0.05657592523650569
j:  1 , D_KL:  0.08189516157698828
j:  2 , D_KL:  0.052518477778733696
j:  3 , D_KL:  0.07793884591987824
j:  4 , D_KL:  0.023451813131678594


In [None]:
for j in range(5):
    a = np.random.permutation(99)
    print("j: ",j, ", D_KL: ", D_KL(SHVN_samples[0].ravel(), usps_samples[a[0]].ravel()))

j:  0 , D_KL:  0.18032699706744665
j:  1 , D_KL:  0.15732199390189355
j:  2 , D_KL:  0.15429816198340462
j:  3 , D_KL:  0.13712380963081827
j:  4 , D_KL:  0.6869462171453429


In [None]:
for j in range(5):
    a = np.random.permutation(5000)
    print("j: ",j, ", D_KL: ", D_KL(SHVN_samples[0].ravel(), MNIST_samples[a[0]].ravel()))

j:  0 , D_KL:  0.4147882779542582
j:  1 , D_KL:  0.3675923524270448
j:  2 , D_KL:  0.3871402434239573
j:  3 , D_KL:  0.4075098328203004
j:  4 , D_KL:  0.41379941282971844


In [None]:
for j in range(5):
    a = np.random.permutation(100)
    print("j: ",j, ", D_KL: ", D_KL(mnist_and_svhn[0].ravel(), usps_samples[a[0]].ravel()))

j:  0 , D_KL:  0.14980464570514945
j:  1 , D_KL:  0.11204769197058867
j:  2 , D_KL:  0.07836818243454427
j:  3 , D_KL:  0.08433996268387739
j:  4 , D_KL:  0.24996483835599
