In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import os 
import pandas as pd 
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from skimage import io
import torch.nn.functional as F
import torch.nn as nn

from torch.utils.data import DataLoader
from torch.utils.data import Sampler
from torch.nn import Module
import torch.optim 
import random
import numpy as np
import time
from tqdm import tqdm
from time import sleep

In [4]:
class NShotTaskSampler(Sampler):
    def __init__(self,
                 dataset: torch.utils.data.Dataset,
                 episodes_per_epoch: int = None,
                 n: int = None,
                 k: int = None,
                 q: int = None,
                 num_tasks: int = 1
                #  fixed_tasks: List[Iterable[int]] = None
    ):
        """PyTorch Sampler subclass that generates batches of n-shot, k-way, q-query tasks.

        Each n-shot task contains a "support set" of `k` sets of `n` samples and a "query set" of `k` sets
        of `q` samples. The support set and the query set are all grouped into one Tensor such that the first n * k
        samples are from the support set while the remaining q * k samples are from the query set.

        The support and query sets are sampled such that they are disjoint i.e. do not contain overlapping samples.

        # Arguments
            dataset: Instance of torch.utils.data.Dataset from which to draw samples
            episodes_per_epoch: Arbitrary number of batches of n-shot tasks to generate in one epoch
            n_shot: int. Number of samples for each class in the n-shot classification tasks.
            k_way: int. Number of classes in the n-shot classification tasks.
            q_queries: int. Number query samples for each class in the n-shot classification tasks.
            num_tasks: Number of n-shot tasks to group into a single batch
            fixed_tasks: If this argument is specified this Sampler will always generate tasks from
                the specified classes
        """
        super(NShotTaskSampler, self).__init__(dataset)
        self.episodes_per_epoch = episodes_per_epoch
        self.dataset = dataset
        if num_tasks < 1:
            raise ValueError('num_tasks must be > 1.')

        self.num_tasks = num_tasks
        # TODO: Raise errors if initialise badly
        self.k = k
        self.n = n
        self.q = q
        # self.fixed_tasks = fixed_tasks

        self.i_task = 0

    def __len__(self):
        return self.episodes_per_epochf

    def __iter__(self):
        for _ in range(self.episodes_per_epoch):
            batch = []

            for task in range(self.num_tasks):
                # if self.fixed_tasks is None:
                #     # Get random classes
                #     episode_classes = np.random.choice(self.dataset.df['class_id'].unique(), size=self.k, replace=False)
                # else:
                    # Loop through classes in fixed_tasks
                episode_classes = np.random.choice(self.dataset.df['class_id'].unique(), size=self.k, replace=False)
                
                df = self.dataset.df[self.dataset.df['class_id'].isin(episode_classes)]

                support_k = {k: None for k in episode_classes}
                for k in episode_classes:
                    # Select support examples
                    support = df[df['class_id'] == k].sample(self.n)
                    support_k[k] = support

                    for i, s in support.iterrows():
                        batch.append(s['id'])

                for k in episode_classes:
                    query = df[(df['class_id'] == k) & (~df['id'].isin(support_k[k]['id']))].sample(self.q)
                    for i, q in query.iterrows():
                        batch.append(q['id'])

            yield np.stack(batch)

In [5]:
class SignatureDataset(Dataset):
    def __init__(self, label_file, root_dir, transform = None):
        '''Arguments:
        label_file: path to csv file which contains 2 columns:
        one with name of the person, other with the label
        root_dir: path file of images
        transform: transformations that will be applied (default: None)'''
        self.df = pd.read_csv(label_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        '''lenght of the file'''
        return len(self.df)
    def __getitem__(self, index):
        '''getting image based on index (protocol in MapDatasets)'''
        images = []
        labels = []
        for ind in index: #since our index coming from 
            img_path = os.path.join(self.root_dir, str(self.df.iloc[ind, 0]))
            image_i = io.imread(img_path)
            label_i = torch.tensor(int(self.df.iloc[ind, 1]))

            if self.transform:
                '''transformations to be done to image'''
                image_i = self.transform(image_i)
            images.append(image_i)
            labels.append(label_i)

        #added squeeze to test it
        label = torch.stack(labels)
        return (images, label)

In [7]:
n= 5
path = '' #change this string to path where we will save the model
k= 5 #number of classes.
q = 5 #number of cl
train_dataset = SignatureDataset('/content/gdrive/My Drive/train_labels.csv', '/content/gdrive/My Drive/full_org')
val_dataset = SignatureDataset('/content/gdrive/My Drive/val_labels.csv','/content/gdrive/My Drive/full_org_val')
fsh_sampler_train = NShotTaskSampler(dataset = train_dataset, episodes_per_epoch=100, n = n, k = k, q = q)
fsh_sampler_val = NShotTaskSampler(dataset = val_dataset, episodes_per_epoch=100, n = n, k = k, q = q)
  #few shot learning sampler
train_dataloader = DataLoader(train_dataset, sampler=fsh_sampler_train) #validation dataloader
val_dataloader = DataLoader(val_dataset, sampler=fsh_sampler_val) #validation dataloader

# device = torch.device('cuda')
# model = Matching(K = 5,input_size = 64, hidden_size=64, layers=5, n_shot= n, k_way=k, q=q, is_full_context_embedding=True) #model for this case
# optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3, momentum = 0.9) #optimizer for training, test ADAM as well
# train(model, optimizer, train_dataloader, val_dataloader, n, k, q, device, 10)
# torch.save(model, path)
_, labels = next(iter(train_dataloader))
print(labels.shape, labels)

torch.Size([1, 50]) tensor([[12, 12, 12, 12, 12,  5,  5,  5,  5,  5, 23, 23, 23, 23, 23, 24, 24, 24,
         24, 24,  9,  9,  9,  9,  9, 12, 12, 12, 12, 12,  5,  5,  5,  5,  5, 23,
         23, 23, 23, 23, 24, 24, 24, 24, 24,  9,  9,  9,  9,  9]])


In [6]:
device = torch.device('cuda') #cuda device to put tensors to same device


class Encoder(nn.Module):
    def __init__(self, input_channels, hidden_channels):
        '''Encoder network to encode the various inputs to embedding space
        Arguments
        ---------
        input_channels: number of channels of the input image
        hidden_channels: number of channels of hidden features'''
        super(Encoder, self).__init__()
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.main = nn.Sequential(
            nn.Conv2d(self.input_channels, self.hidden_channels, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm2d(self.hidden_channels),
            nn.Conv2d(self.hidden_channels, self.hidden_channels, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(self.hidden_channels),
            nn.Conv2d(self.hidden_channels, self.hidden_channels, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(self.hidden_channels),
            nn.Conv2d(self.hidden_channels, self.hidden_channels, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm2d(self.hidden_channels),
            nn.AdaptiveMaxPool2d((1,1)),
            nn.Flatten()
        )

    def forward(self, x):
        list_x = []
        for i in range(len(x)):
            feature_i = self.main(x[i].unsqueeze(0).to(device).float())
            list_x.append(feature_i)
        output = torch.cat(tuple(list_x), dim = 0)
        return output
class AttLSTM(nn.Module):
    def __init__(self, K, input_size, hidden_size):
        '''attention LSTM with skip connections
        Arguments
        ---------
        K: number of procesing steps
        input_size: size of input
        hidden_size: size of hidden features'''
        super(AttLSTM, self).__init__()
        self.processing = K #number of times to run lstm cells ( number of lstm cells basically)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)
        self.softmax = nn.Softmax(dim = 1)
    def forward(self, f_x, g_S):
        h = f_x
        #g_S: shape (n*k, feature_size)
        #f_X: shape: (n*q, feature_size)
        c = torch.zeros(f_x.shape[0], f_x.shape[1]).to(device).float() #putting our tensor to device
        for _ in range(self.processing):
            product = torch.matmul(h, g_S.T)#product shape (nk x nq) -- > (nq x nk)
            a = self.softmax(product) #attention
            r = torch.matmul(a, g_S) #summation over sequential data
            concat = h + r #concatination
            h, c  = self.lstm(f_x, (concat, c)) #output of LSTM
            h = h + f_x #skip connection
        return h

class Matching(nn.Module):# change attention LSTM to transformer.(i.e add transformer option) 
    #change bidirectional lstm to one with skip connection.
    def __init__(self, K, input_size, hidden_size, layers, n_shot, k_way, q, is_full_context_embedding = False):
        super(Matching, self).__init__()
        self.layers = layers
        self.processing = K
        self.n_shot = n_shot
        self.k_way  = k_way
        self.q = q
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.is_full_context_embedding = is_full_context_embedding
        self.mapping = Encoder(1, self.input_size)
        self.biLSTM = nn.LSTM(self.input_size, self.hidden_size//2, self.layers, bidirectional = True) #set other arguments as well
        self.attLSTM = AttLSTM(K, self.input_size, self.hidden_size) #set other arguments as well 
        # self.cos = nn.CosineSimilarity(dim=2)
        self.softmax = nn.Softmax(dim = 1)
    def cosine_similarity_matrix(self, x:torch.tensor,
                                    y:torch.tensor,
                                    eps:torch.tensor = torch.tensor(1e-8)):
            '''Cosine similarity for matrices, whose columns are same
            x: shape (q, e): query set
            y: shape (s, e): support set
            eps: for numerical stabilizity
            output: (q,s): columns of this matrice correspond to 
            attention vector of each sample from query'''
            matrix_product = torch.matmul(x, y.T) # (n*q, feature) x (feature, n*k) ==> (n*q, n*k)
            # print(x.shape, y.shape, "x, y shapes")
            l2_norm_x = torch.norm(x, p = 2, dim = 1).unsqueeze(1) # size of this (n*q, 1) (so column vector)
            # print(l2_norm_x.shape, "shape of norm of x")
            l2_norm_y = torch.norm(y, p = 2, dim = 1).unsqueeze(1) # (n*k, 1) column vector
            # print(l2_norm_y.shape, "shape of norm of y")
            denominator = torch.maximum(torch.matmul(l2_norm_x, l2_norm_y.T), eps) #denominator: (n*q, 1) x (1, n*k) = (n*q, n*k)
            return 1 - matrix_product/denominator
    def forward(self, x):

        if self.is_full_context_embedding:
            x  = self.mapping(x)
            # print(x.shape, "X shape here")
            support_set = x[:self.n_shot*self.k_way] #size of the x is (n*k + q+k, feature_size)
            support_set = support_set.unsqueeze(1)
            # print(support_set.shape, "here support set shape")
            query_set = x[self.n_shot*self.k_way:]

            g_S, _ = self.biLSTM(support_set)
            g_S = g_S.squeeze(1)
            # print(g_S.shape, "Here we have g_S shape")
            # print(query_set.shape, "Here shape of query set")
            f_x = self.attLSTM(query_set, g_S)
            # print(f_x.shape, "here f_x shape")

            return self.softmax(self.cosine_similarity_matrix(f_x, g_S))
        else:
            x = self.mapping(x)
            support_set = x[:self.n_shot*self.k_way]
            # support shape: (n*k, feature)
            query_set = x[self.n_shot*self.k_way:]
            # query shape" (n*q, feature)
            return self.softmax(self.cosine_similarity_matrix(query_set, support_set))


In [8]:
def train(model: Module,
          optimizer: torch.optim.Optimizer,
          train_dataloader:DataLoader,
          val_dataloader:DataLoader,
        #   N_tr: int ,
          n: int ,
          k: int ,
          q: int,
          device:torch.cuda.device,
          epochs: int):
    '''1. find Dataset #number of classes
       2.fill random sampler function from 
        3. fill examples_per_class
        4. extract examples per_class'''
    criterion = nn.NLLLoss()
    model.train()
    def ohe(y:torch.tensor, k):
                #one hot encoding of labels
        y = y.squeeze(0).to(device)
        y_shape = y.shape
        classes = torch.unique(y).to(device).long() #sorted tensor containing classes
        # tpl = (y_shape, k)
        for i in range(classes.shape[0]):
          y[(y == classes[i]).nonzero(as_tuple=True)[0]] = i
        return y
        # one_hot_y = torch.zeros(tpl).to(device).float()
        # values = []
        # for i, label in enumerate(y):
        #     if label not in values:
        #         values.append(label)
        #         k = len(values)
        #         one_hot_y[i, k] = 1
        #     else:
        #         k = values.index(label)
        #         one_hot_y[i, k] = 1
        # return one_hot_y

    loss_total = 0
    accuracy = []
    for epoch in range(epochs):
        with tqdm(train_dataloader, unit="support") as tepoch:
            for (x, label) in tepoch:
                '''shape of x: (batch, n*k + q*k, channels, height, width)'''
                label.to(device).float()
                # print(label.to(device))
                # label.squeeze(0)

                new_y = ohe(label, k).long() #new labels ranging from 0 to k
                # index_error = new_y.clone().long() #new labels ranging from 0 to k
                one_hot_y = F.one_hot(new_y).float() #one_hot encoding of the new labels 
                y_S = new_y[:n*k]
                one_y_S = one_hot_y[:n*k] #getting labels of support set
                y = new_y[n*k:]#getting labels of query set note that this one_hot encoded y
                one_y = one_hot_y[n*k:] #one hot labels of query set
                attention = model(x) #outputting attention
                # print(attention, 'attention tensor')
                y_hat = torch.matmul(attention, one_y_S)#getting predicted labels for query set

                optimizer.zero_grad()
                # print(y_hat, "HERE Y_HAT")
                # print(y, "HERE LABEL")
                print(y_hat.shape, y.shape, "shape of y and y_hat") 
                loss = criterion(y_hat, y)
                # print(loss, "HERE LOSS")
                loss.backward()
                optimizer.step()
                # pred = torch.zeros_like(y_hat).to(device).float()
                # pred = pred.fill_(1, torch.argmax(y_hat, dim = 1))
                pred = torch.argmax(y_hat, dim = 1)
                loss_total += loss.item()
                acc = (pred == y).sum().item()/y.shape[0]
                accuracy.append(acc)
                accuracy_total = sum(accuracy)/len(accuracy)
                tepoch.set_postfix(loss=loss.item(), accuracy_total=100. * accuracy_total)
                # sleep(0.1)

            # if batch_idx % 50 == 0:
            #     print(f'Epoch : {epoch} || {batch_idx}/{len(C)} || \
            #     loss : {loss.item():.3f}, accuracy : {accuracy * 100:.3f})

            

        model.eval()
        valid_loss_total = 0
        valid_accuracy = []
        with torch.no_grad():
            with tqdm(train_dataloader, unit="support") as tepoch:
                for (x, label) in val_dataloader:

                    label = label.to(device).float()

                    new_y = ohe(label, k).long() #new labels ranging from 0 to k
                    # index_error = new_y.clone().long() #new labels ranging from 0 to k
                    one_hot_y = F.one_hot(new_y).float() #one_hot encoding of the new labels 
                    y_S = new_y[:n*k]
                    one_y_S = one_hot_y[:n*k] #getting labels of support set
                    y = new_y[n*k:]#getting labels of query set note that this one_hot encoded y
                    one_y = one_hot_y[n*k:] #one hot labels of query set
                    attention = model(x) #outputting attention
                    y_hat = torch.matmul(attention.T, one_y)#getting predicted labels for query set


                    optimizer.zero_grad()
                    loss = criterion(y_hat, y)
                    # pred = torch.zeros_like(y_hat).to(device).float()
                    # pred = pred.fill_(1, torch.argmax(y_hat, dim = 1))
                    pred = torch.argmax(y_hat, dim = 1)
                    valid_loss_total += loss.item()
                    valid_acc = (pred == y).sum().item()/y.shape[0] #write accuracy function for this, next time add F1 scores for this
                    valid_accuracy.append(valid_acc)

                    tepoch.set_postfix(loss=valid_loss_total, accuracy=100. * valid_acc)


In [9]:
torch.cuda.empty_cache()
import gc
# del variables
gc.collect()
n= 2
path = '' #change this string to path where we will save the model
k= 2 #number of classes.
q = 3 #number of cl
train_dataset = SignatureDataset('/content/gdrive/My Drive/train_labels.csv', '/content/gdrive/My Drive/full_org')
val_dataset = SignatureDataset('/content/gdrive/My Drive/val_labels.csv','/content/gdrive/My Drive/full_org_val')
fsh_sampler_train = NShotTaskSampler(dataset = train_dataset, episodes_per_epoch=5, n = n, k = k, q = q)
fsh_sampler_val = NShotTaskSampler(dataset = val_dataset, episodes_per_epoch=5, n = n, k = k, q = q)
  #few shot learning sampler
train_dataloader = DataLoader(train_dataset, sampler=fsh_sampler_train) #validation dataloader
val_dataloader = DataLoader(val_dataset, sampler=fsh_sampler_val) #validation dataloader

device = torch.device('cuda')
model = Matching(K = 5,input_size = 64, hidden_size=64, layers=5, n_shot= n, k_way=k, q=q, is_full_context_embedding=False).to(device) #model for this case
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3, momentum = 0.9) #optimizer for training, test ADAM as well
train(model, optimizer, train_dataloader, val_dataloader, n, k, q, device, 10)
# torch.save(model, path)

RuntimeError: ignored

In [None]:
model_save_name = 'classifier.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
torch.save(model.state_dict(), path)