In [312]:
import numpy as np
train_npz_file = np.load('../data/processed/train.npz')
X_train, y_train = train_npz_file['arr_0'], train_npz_file['arr_1']

In [315]:
X_train = X_train.reshape(X_train.shape[0], 1, 105, 105)
X_train.shape

(19280, 1, 105, 105)

In [3]:
import pandas as pd
y_train_pd = pd.DataFrame(data=y_train, columns=['Alphabet', 'Character', 'Drawer'])

In [6]:
a = [np.ones(5) for i in range(3)]
np.stack(a)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
a = [x[0] + '_' + x[1] for x in y_train[:5]]

In [9]:
b = pd.DataFrame(data=a, columns=['Character'])

In [15]:
b.Character.unique()

array(['Gujarati_character42'], dtype=object)

In [20]:
a = [x[0] + '_' + x[1] for x in y_train]
characters = pd.DataFrame(data=a, columns=['Character'])

In [21]:
characters.Character.unique().shape

(964,)

In [23]:
rnd_chars = np.random.choice(characters.Character.unique(), 20, replace=False)

In [25]:
np.random.choice(rnd_chars, 2, replace=False) 

array(['Syriac_(Estrangelo)_character03', 'Burmese_(Myanmar)_character07'],
      dtype=object)

In [30]:
y_train_pd.sample?

[0;31mSignature:[0m [0my_train_pd[0m[0;34m.[0m[0msample[0m[0;34m([0m[0mn[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mfrac[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mreplace[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mrandom_state[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Returns a random sample of items from an axis of object.

Parameters
----------
n : int, optional
    Number of items from axis to return. Cannot be used with `frac`.
    Default = 1 if `frac` = None.
frac : float, optional
    Fraction of axis items to return. Cannot be used with `n`.
replace : boolean, optional
    Sample with or without replacement. Default = False.
weights : str or ndarray-like, optional
    Default 'None' results in equal probability weighting.
    If passed a Series, will align with target object on index. Index
    values in weight

In [34]:
y_train_pd[characters.Character == rnd_chars[0]].sample(2).index.values

array([16693, 16696])

In [36]:
rnd_drawers = np.random.choice(y_train_pd.Drawer.unique(), 2, replace=False)

In [49]:
train_inds = y_train_pd[
    (y_train_pd.Drawer == rnd_drawers[1]) &
    (characters.Character.isin(rnd_chars))
].index.values

In [50]:
train_inds

array([  467,  1805,  2902,  4334,  4987,  5046,  6332,  6461,  9057,
        9125, 11985, 12179, 14255, 14314, 15258, 15426, 15446, 16694,
       17497, 18508])

How to get 19 additional characters that are not `Gujarati_character42`

In [58]:
characters[~(characters.Character == 'Gujarati_character42')].sample(19)

Unnamed: 0,Character
2516,Malay_(Jawi_-_Arabic)_character10
16806,Syriac_(Estrangelo)_character23
18259,Bengali_character26
5126,Futurama_character09
11249,Greek_character16
11930,Ojibwe_(Canadian_Aboriginal_Syllabics)_charact...
1125,Korean_character21
19040,Inuktitut_(Canadian_Aboriginal_Syllabics)_char...
5314,N_Ko_character27
12005,Japanese_(katakana)_character45


In [62]:
np.random.choice(characters.Character.unique(), 20, replace=False)

array(['Bengali_character40', 'N_Ko_character02',
       'Asomtavruli_(Georgian)_character01', 'Latin_character22',
       'Asomtavruli_(Georgian)_character16', 'N_Ko_character28',
       'Grantha_character11', 'Balinese_character19',
       'Asomtavruli_(Georgian)_character07', 'Latin_character12',
       'Tifinagh_character43', 'Asomtavruli_(Georgian)_character21',
       'Japanese_(hiragana)_character10', 'Early_Aramaic_character16',
       'Mkhedruli_(Georgian)_character01', 'Tifinagh_character20',
       'Armenian_character01', 'Malay_(Jawi_-_Arabic)_character28',
       'Japanese_(katakana)_character38', 'Grantha_character01'],
      dtype=object)

Seems there are a couple of ways we could generate batches of one-shot trials for training.  Want them all to be based on caching indices so they are fast and memory efficient.  Here they are:
* Just randomly generate a bunch of one-shot tasks by picking characters and two drawers.  Enough of these should do a good job at representing the intricacies of the dataset and comparing a character against a bunch of others.
* Go through and generate a bunch of batches for each specific character by: 
    1. Selecting 19 other characters.
    2. Selecting 2 drawers.
    3. Find the index numbers for the 21 characters that result from assigning 1 drawer
    to the character under consideration and another drawer to both the character under  consideration and the 19 remaining characters.  
    4. Track classes appropriately

An idea for tracking classes is just to give `np.arange(20)` to the train images in turn and then find the corresponding number for the matching test image.  These can be sent to one-hot at the appropriate time.  

For the second method, these batch indices would be stored to a dictionary under the key of the character.  Then to cycle through batches, we could generate a random ordering for unique characters and cycle through the batches in the dictionary items in the same way that we did for the caching of siamese tasks.  

This almost seems to be the best way to do it.  That way we make sure that we get a good amount of practice with each character, so the model will see a real variety of images and 20-way one-shot tasks.    

In [158]:
def get_one_shot_indices(ch, y=y_train_pd, c=characters, bs=32):
    """
    inputs a character and generates a one-shot task for it
    by selecting two random drawers and 19 other characters
    to form a one-shot task around.  This is all done in terms
    of indices.  We can then use this to store a large number
    of one-shot tasks for each character.  
    """
    tst_inds, trn_inds = [], []
    for _ in range(bs):
        rnd_dr = np.random.choice(y.Drawer.unique(),
                                  2, replace=False)
        rnd_ch = np.random.choice(
                    c.Character[c.Character != ch].unique(),
                    19, replace=False)
        rnd_ch = np.append(ch, rnd_ch)
        c_trn_inds = y[(c.Character.isin(rnd_ch)) &
                       (y.Drawer == rnd_dr[0])].index.values
        c_tst_inds = y[(c.Character == ch) &
                       (y.Drawer == rnd_dr[1])].index.values
        cls = np.argmax(
                c.Character.iloc[c_trn_inds].isin([ch]).values
            )
        tst_inds.append((c_tst_inds[0], cls))
        trn_inds.append(c_trn_inds)
        # print(y.iloc[c_tst_inds])
        # print(y.iloc[c_trn_inds[0]]) 
    return np.array(tst_inds), np.array(trn_inds)

In [341]:
import torch
import torch.nn as nn

class BatchingForMatching:
    
    def __init__(self, X, y, mode='cache', cache_size=20,
                       batch_size=32, n_way=20):
        self.X = X
        self.c = pd.DataFrame(
                        data=[ch[0] + '_' + ch[1] for ch in y],
                        columns=['Character']
                    )
        self.unique_characters = np.random.permutation(
                                    self.c.Character.unique())
        self.num_uc = self.unique_characters.shape[0]
        self.batch_num = 0 # keep track of current batch number
        self.curr_cache = 0 # and current position in the cache dict 
        self.y = pd.DataFrame(
                        data=y,
                        columns=['Alphabet', 'Character', 'Drawer']
                    )
        self.mode = mode
        self.n_way = n_way
        self.cache_size = cache_size
        self.batch_size = batch_size
        self.cache = {}
        self.form_cache()
    
        self.n = X.shape[0]
        self.current_task_number = 0
        
    def get_one_shot_indices(self, ch): #,  y=y_train_pd, c=characters, bs=32):
        """
        inputs a character and generates a one-shot task for it
        by selecting two random drawers and 19 other characters
        to form a one-shot task around.  This is all done in terms
        of indices.  We can then use this to store a large number
        of one-shot tasks for each character.  
        """
        tst_inds, trn_inds = [], []
        for _ in range(self.batch_size):
            rnd_dr = np.random.choice(self.y.Drawer.unique(),
                                  2, replace=False)
            rnd_ch = np.random.choice(
                    self.c.Character[self.c.Character != ch].unique(),
                    19, replace=False)
            rnd_ch = np.append(ch, rnd_ch)
            c_trn_inds = self.y[(self.c.Character.isin(rnd_ch)) &
                       (self.y.Drawer == rnd_dr[0])].index.values
            c_tst_inds = self.y[(self.c.Character == ch) &
                       (self.y.Drawer == rnd_dr[1])].index.values
            cls = np.argmax(
                self.c.Character.iloc[c_trn_inds].isin([ch]).values
                )
            tst_inds.append((c_tst_inds[0], cls))
            trn_inds.append(c_trn_inds)
            # print(y.iloc[c_tst_inds])
            # print(y.iloc[c_trn_inds[0]]) 
        return np.array(tst_inds), np.array(trn_inds)
    
    def form_cache(self):
        for ch in self.unique_characters:
            self.cache[ch] = []
            for _ in range(self.cache_size):
                self.cache[ch].append(self.get_one_shot_indices(ch))
                
    def generate_batch(self):
        """
        This is more or less giving what we wanted.  Now, the thing
        to do is to adjust the output so that it is putting out the
        torch tensors that we want for training...  So we need to
        form these from what is currently out, and then output them
        instead.  Cool beans.  
        """
        out = self.cache[
                    self.unique_characters[self.batch_num]
                ][self.curr_cache]
        self.batch_num = (1 + self.batch_num) % self.num_uc
        if self.batch_num == 0:
            self.curr_cache = (1 + self.curr_cache) % self.cache_size
            
        tst_inds, trn_inds = out
        
        # Construct torch tensors for target images and classes
        target_ims = torch.tensor(self.X[tst_inds[:, 0]])
        target_classes = torch.tensor(tst_inds[:, 1])
        # target_classes = np.zeros((self.batch_size, self.n_way))
        # target_classes[np.arange(self.batch_size), tst_inds[:, 1]] = 1
        # target_classes = torch.tensor(target_classes, dtype=torch.int32)
        
        # Construct torch tensors for support set images and classes
        support_set_ims = torch.stack(
                [torch.tensor(self.X[trn_inds[i, :]]) for i in range(self.batch_size)]
            )
        support_set_classes = torch.stack(
                [torch.eye(self.n_way) for i in range(self.batch_size)]
            )
        return target_ims, target_classes, support_set_ims, support_set_classes

In [408]:
%time Data = BatchingForMatching(X_train, y_train, cache_size=4)

CPU times: user 19min 18s, sys: 12.8 s, total: 19min 31s
Wall time: 19min 43s


This initialization is quite slow.  Another way to do it would be to cache things as we generate them.  So each minibatch we generate is cached until the internal dictionary reaches the cache specs required.  Then, after this, we just reference cache on calling generate_batch()...  Let's try this out.

In [409]:
(a, b, c, d) = Test_BatchingForMatching.generate_batch()

In [410]:
b

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2])

In [273]:
import torch
import torch.nn as nn

batch_size, im_dim, n_channels, n_way = 32, 105, 1, 20

x = torch.randn(batch_size, im_dim, im_dim, n_channels)

def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

In [274]:
encoder = torch.nn.Sequential(
                torch.nn.Conv2d(
                    in_channels=1, 
                    out_channels=64,
                    kernel_size=5
                ),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(2),
                torch.nn.Dropout2d(),
                torch.nn.Conv2d(
                    in_channels=64, 
                    out_channels=128,
                    kernel_size=3
                ),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(2),
                torch.nn.Dropout2d(),
                Flatten(),
                torch.nn.Linear(73728, 128) #int(128 * 3 * 3 / 4), 128)
            )

In [356]:
loss = torch.nn.CrossEntropyLoss()

In [276]:
target = torch.randn(batch_size, n_channels, im_dim, im_dim)
support_set = torch.randn(batch_size, n_way, n_channels, im_dim, im_dim)
support_set_classes = torch.randn(batch_size, n_way, n_way)

In [278]:
cos = nn.CosineSimilarity(dim=2)

In [279]:
repeats = [1 for _ in range(128)]
repeats[0] = n_way

target_embedding = encoder(target)

support_set_embeddings = torch.stack(
        [encoder(support_set[:, i, :, :, :]) for i in range(n_way)]
    )

print(support_set_embeddings.size(), 
      target_embedding.repeat([n_way, 1, 1]).size())
similarities = cos(
            target_embedding.repeat([n_way, 1, 1]), 
            support_set_embeddings
  ).t()

torch.Size([20, 32, 128]) torch.Size([20, 32, 128])


In [280]:
similarities.shape

torch.Size([32, 20])

In [281]:
softmax = torch.nn.Softmax(dim=1)
softmax_sims = softmax(similarities)
print(softmax_sims.shape)
preds = softmax_sims.unsqueeze(1).bmm(support_set_classes).squeeze()

torch.Size([32, 20])


In [282]:
preds.shape

torch.Size([32, 20])

In [283]:
target_classes = torch.rand(batch_size, n_way)

In [288]:
torch.tensor(np.zeros((5, 3)))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)

In [287]:
torch.tensor?

[0;31mDocstring:[0m
tensor(data, dtype=None, device=None, requires_grad=False) -> Tensor

Constructs a tensor with :attr:`data`.


    :func:`torch.tensor` always copies :attr:`data`. If you have a Tensor
    ``data`` and want to avoid a copy, use :func:`torch.Tensor.requires_grad_`
    or :func:`torch.Tensor.detach`.
    If you have a NumPy ``ndarray`` and want to avoid a copy, use
    :func:`torch.from_numpy`.

Args:
    data (array_like): Initial data for the tensor. Can be a list, tuple,
        NumPy ``ndarray``, scalar, and other types.
    dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
        Default: if ``None``, infers data type from :attr:`data`.
    device (:class:`torch.device`, optional): the desired device of returned tensor.
        Default: if ``None``, uses the current device for the default tensor type
        (see :func:`torch.set_default_tensor_type`). :attr:`device` will be the CPU
        for CPU tensor types and the current C

In [293]:
torch.tensor(np.eye(2), dtype=torch.int32)

tensor([[1, 0],
        [0, 1]], dtype=torch.int32)

In [294]:
X_train.shape

(19280, 105, 105, 1)

In [295]:
torch.eye(5)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [319]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cpu


In [None]:
# We need to wrap `flatten` function in a module in order to stack it
# in nn.Sequential
class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    Flatten(),
    nn.Linear(3 * 32 * 32, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size, 10),
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

train_part34(model, optimizer)


def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [354]:
optimizer = torch.optim.Adam(encoder.parameters())

In [322]:
encoder = encoder.to(device=device)

In [400]:
target_ims, target_classes, support_set_ims, support_set_classes = Test_BatchingForMatching.generate_batch()

In [391]:
dtype = torch.float32

In [401]:
target_ims = target_ims.to(device=device, dtype=dtype)
target_classes = target_classes.to(device=device, dtype=torch.long)
support_set_ims = support_set_ims.to(device=device, dtype=dtype)
support_set_classes = support_set_classes.to(device=device, dtype=dtype)

In [402]:
target_embedding = encoder(target_ims)

support_set_embeddings = torch.stack(
        [encoder(support_set_ims[:, i, :, :, :]) for i in range(n_way)]
    )

print(support_set_embeddings.size(), target_embedding.repeat([n_way, 1, 1]).size())
similarities = cos(
            target_embedding.repeat([n_way, 1, 1]), 
            support_set_embeddings
  ).t()

torch.Size([20, 32, 128]) torch.Size([20, 32, 128])


In [403]:
softmax = torch.nn.Softmax(dim=1)
softmax_sims = softmax(similarities)
print(softmax_sims.shape)
preds = softmax_sims.unsqueeze(1).bmm(support_set_classes).squeeze()

torch.Size([32, 20])


In [404]:
lp = loss(preds, target_classes)

In [405]:
optimizer.zero_grad()
lp.backward()
optimizer.step()

In [406]:
torch.argmax(preds, dim=1)

tensor([ 3, 18,  1,  8,  5, 16,  1,  2, 13,  2,  0,  1,  2, 15,  6, 19, 13,  6,
        15,  2, 12, 10, 13,  9,  0,  7, 11,  8,  1, 15, 16, 15])

In [407]:
target_classes

tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])

In [381]:
support_set_classes.shape

torch.Size([32, 20, 20])

In [399]:
lp

tensor(2.9943, grad_fn=<NllLossBackward>)

In [424]:
for i in range(10):
    # Pull batch from batch generator
    target_ims, target_classes, support_set_ims, support_set_classes = Data.generate_batch()
    
    # Push batch to device
    target_ims = target_ims.to(device=device, dtype=dtype)
    target_classes = target_classes.to(device=device, dtype=torch.long)
    support_set_ims = support_set_ims.to(device=device, dtype=dtype)
    support_set_classes = support_set_classes.to(device=device, dtype=dtype)
    
    # Push batch through the model
    target_embedding = encoder(target_ims)
    support_set_embeddings = torch.stack(
        [encoder(support_set_ims[:, i, :, :, :]) for i in range(n_way)]
        )

    similarities = cos(
            target_embedding.repeat([n_way, 1, 1]), 
            support_set_embeddings
        ).t()
    softmax = torch.nn.Softmax(dim=1)
    softmax_sims = softmax(similarities)
    preds = softmax_sims.unsqueeze(1).bmm(support_set_classes).squeeze()
    
    # Evaluate loss and push gradients back through the graph
    lp = loss(preds, target_classes)
    optimizer.zero_grad()
    lp.backward()
    optimizer.step()

In [427]:
float((torch.argmax(preds, dim=1) == target_classes).sum()) / 32

0.0625

In [428]:
torch.nn.Dropout2d?

[0;31mInit signature:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mDropout2d[0m[0;34m([0m[0mp[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m [0minplace[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Randomly zeroes whole channels of the input tensor.
The channels to zero-out are randomized on every forward call.

Usually the input comes from :class:`nn.Conv2d` modules.

As described in the paper
`Efficient Object Localization Using Convolutional Networks`_ ,
if adjacent pixels within feature maps are strongly correlated
(as is normally the case in early convolution layers) then i.i.d. dropout
will not regularize the activations and will otherwise just result
in an effective learning rate decrease.

In this case, :func:`nn.Dropout2d` will help promote independence between
feature maps and should be used instead.

Args:
    p (float, optional): probability of an element to be zero-ed.
    inplace (bool, optional): If set to ``True``, will

In [429]:
encoder.parameters().shape

AttributeError: 'generator' object has no attribute 'shape'

In [430]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [431]:
count_parameters(encoder)

9512832