In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models

import numpy as np

from PIL import Image

import pickle as pkl

import matplotlib.pyplot as plt
%matplotlib inline

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Data parser

In [2]:
class BatchLoader():
    def __init__(self, features, labels):
        self.features = features
        self.reference_features = self.random_pairs(features, labels)
        self.labels = labels
        
    def random_pairs(self, X, labels):
        Y = X.copy()
        for l in range(labels.shape[1]):
            inds = np.where(labels[:, l])[0]
            inds_pairs = np.random.permutation(inds)
            Y[inds, :] = X[inds_pairs, :]
        return Y
    
    def batch_load(self, start, end):
        if start == 0:
            idx = np.r_[:self.features.shape[0]]
            np.random.shuffle(idx)
            self.features = self.features[idx]
            self.reference_features = self.reference_features[idx]
            self.labels = self.labels[idx]
            
        if end > self.features.shape[0]:
            end = self.features.shape[0]
            
        return self.features[start:end], self.reference_features[start:end], self.labels[start:end]

In [3]:
features_train = np.load('Data/DEData/features_train.npy').astype('float32')
labels_train = np.load('Data/DEData/labels_train.npy').astype('float32')
loader = BatchLoader(features_train, labels_train)

# Meta-learning models

In [4]:
class DeltaEncoder(nn.Module):
    def __init__(self, input_size=2048, hidden_size=8192, neck_size=16):
        encoder = nn.Sequential(
            nn.Linear(input_size * 2, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            
            nn.Linear(hidden_size, neck_size),
        )
        
        decoder = nn.Sequential(
            nn.Linear(input_size + neck_size, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            
            nn.Linear(hidden_size, input_size),
        )
        dropout = nn.Dropout(0.5)
        
        super(DeltaEncoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.dropout = dropout

    def forward(self, X1, X2):
        out = self.dropout(X1)
        out = torch.cat((out, X2), dim=1)
        out = self.encoder(out)
        
        out = torch.cat((X2, out), dim=1)
        out = self.decoder(out)
        return out

In [5]:
G = DeltaEncoder(2048, 512, 8).to(device)

# Meta-learning phase

In [6]:
MAE = nn.L1Loss(reduction='none')
MSE = nn.MSELoss(reduction='none')
def weighted_MAE(predict, target):
    batch_size = predict.shape[0]
    feature_size = predict.shape[1]

    substract_norm = MSE(predict, target)
    L2_norms = torch.sum(substract_norm, dim=1) + 10e-7
    weights = substract_norm / L2_norms.reshape((batch_size, 1)).expand((batch_size, feature_size))

    substract = MAE(predict, target)
    losses = torch.sum(substract * weights, dim=1)
    loss = torch.mean(losses)
    return loss

In [7]:
optimizer = torch.optim.Adam(G.parameters(), lr=10e-5)

In [8]:
batch_size = 512
train_size = 48000

for epoch in range(20):
    for i in range(train_size // batch_size):
        features, reference_features, labels = loader.batch_load(i * batch_size, (i + 1) * batch_size)
        features = torch.tensor(features, device=device, dtype=torch.float32, requires_grad=False)
        reference_features = torch.tensor(reference_features, device=device, dtype=torch.float32, requires_grad=False)
        predict = G(features, reference_features)
        
        loss = weighted_MAE(predict, features)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i % 10 == 0):
            print('Epoch {} Loss {}'.format(epoch, loss.detach()))

Epoch 0 Loss 10.37740707397461
Epoch 0 Loss 9.422536849975586
Epoch 0 Loss 8.206025123596191
Epoch 0 Loss 7.442513942718506
Epoch 0 Loss 7.036445617675781
Epoch 0 Loss 6.651961326599121
Epoch 0 Loss 6.4385857582092285
Epoch 0 Loss 6.247102737426758
Epoch 0 Loss 6.1639604568481445
Epoch 0 Loss 6.139676094055176
Epoch 1 Loss 6.125914096832275
Epoch 1 Loss 6.021148681640625
Epoch 1 Loss 6.0706305503845215
Epoch 1 Loss 5.966450214385986
Epoch 1 Loss 5.8865275382995605
Epoch 1 Loss 5.88031005859375
Epoch 1 Loss 5.821693420410156
Epoch 1 Loss 5.85353946685791
Epoch 1 Loss 5.846227645874023
Epoch 1 Loss 5.7451019287109375
Epoch 2 Loss 5.807949542999268
Epoch 2 Loss 5.7820024490356445
Epoch 2 Loss 5.650452136993408
Epoch 2 Loss 5.7722930908203125
Epoch 2 Loss 5.804440498352051
Epoch 2 Loss 5.7323408126831055
Epoch 2 Loss 5.738101959228516
Epoch 2 Loss 5.6783270835876465
Epoch 2 Loss 5.6808061599731445
Epoch 2 Loss 5.676595211029053
Epoch 3 Loss 5.694836616516113
Epoch 3 Loss 5.6591715812683105

# Generation & storing new samples (5 new instances per class)

In [9]:
class DeltaEncoderGenerator(nn.Module):
    def __init__(self, input_size=2048, hidden_size=8192, neck_size=16):
        encoder = nn.Sequential(
            nn.Linear(input_size * 2, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            
            nn.Linear(hidden_size, neck_size),
        )
        
        decoder = nn.Sequential(
            nn.Linear(input_size + neck_size, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            
            nn.Linear(hidden_size, input_size),
        )
        dropout = nn.Dropout(0.5)
        
        super(DeltaEncoderGenerator, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.dropout = dropout

    def forward(self, X1, X2, shot):
        out = self.dropout(X1)
        out = torch.cat((out, X2), dim=1)
        out = self.encoder(out)
        
        out = torch.cat((shot, out), dim=1)
        out = self.decoder(out)
        return out

In [10]:
G_trained = DeltaEncoderGenerator(2048, 512, 8).to(device)
G_trained.load_state_dict(G.state_dict())

<All keys matched successfully>

In [11]:
num_shots = 5
episode = torch.zeros(1, 5, num_shots, 2048, device=device, requires_grad=False)

for i in range(5):
    with open('Data/PickledClasses/' + str(95 + i), 'rb') as f:
        data = pkl.load(f)
        
    shot_numbers = np.random.randint(0, 600, size=1)
    episode[0][i][:num_shots] = data['features'][shot_numbers]
    
episode.shape

torch.Size([1, 5, 5, 2048])

In [12]:
# store samples to cpu!

batch_size = 128
gen_size = 1024
train_size = 48000
class_data = {
    'label': 0,
    'features': torch.zeros(gen_size, 2048, device=device, dtype=torch.float32, requires_grad=False)
}

for class_num in range(5):
    indices = np.random.randint(low=0, high=train_size // batch_size, size=gen_size // batch_size)
    j = 0
    for i in indices:
        features, reference_features, labels = loader.batch_load(i * batch_size, (i + 1) * batch_size)
        features = torch.tensor(features, device=device, dtype=torch.float32, requires_grad=False)
        reference_features = torch.tensor(reference_features, device=device, dtype=torch.float32, requires_grad=False)
        
        # shot = episode[0][class_num].expand(batch_size, 2048)
        idx = np.arange(batch_size) % num_shots
        shot = torch.zeros(batch_size, 2048)
        shot[:] = episode[0, class_num, batch_size % num_shots]
        class_data['features'][j * batch_size:(j + 1) * batch_size] = G_trained(features, reference_features, shot.to(device)).detach()
        j += 1
        
    class_data['label'] = class_num
    with open('Data/SynthFeatures/' + str(class_num), 'wb') as f:
        pkl.dump(class_data, f)
    print('Class {} processed.'.format(class_num))

Class 0 processed.
Class 1 processed.
Class 2 processed.
Class 3 processed.
Class 4 processed.


In [13]:
batch_size = 128
class_size = 1024
train_size = 48000
pack_features = np.zeros((5, 2, 1024, 2048), dtype=np.float32)

total_indices = np.random.permutation(train_size // batch_size)
for class_num in range(5):
    indices = total_indices[class_num * (class_size // batch_size):(class_num + 1) * (class_size // batch_size)]
    j = 0
    for i in indices:
        features, reference_features, labels = loader.batch_load(i * batch_size, (i + 1) * batch_size)
        pack_features[class_num][0][j * batch_size:(j + 1) * batch_size] = features
        pack_features[class_num][1][j * batch_size:(j + 1) * batch_size] = reference_features
        j += 1
        
with open('Data/SynthMaterial/0', 'wb') as f:
    pkl.dump({'features': pack_features}, f)
print('Package processed.')

Package processed.


# Training target classyfier

In [14]:
class BatchLoader():
    def __init__(self, class_size, num_classes, first_class, batch_size, batches_in_buff, path):
        self.class_size = class_size
        self.num_classes = num_classes
        self.first_class = first_class
        self.batch_size = batch_size
        self.batches_in_buff = batches_in_buff
        self.path = path
        
        self.indices = np.random.permutation(num_classes * class_size)
        self.buff_size = batches_in_buff * batch_size
        self.buff = [{'label': 0, 'features': torch.zeros(2048, device=device)} for i in range(self.buff_size)]
        self.buff_num = 0
    
    def buff_gen(self, buff_num):
        buff_indices = self.indices[buff_num * self.buff_size:(buff_num + 1) * self.buff_size]

        for i in range(self.num_classes):
            with open(self.path + str(self.first_class + i), 'rb') as f:
                class_data = pkl.load(f)

            class_indices = np.where(((buff_indices < (i + 1) * self.class_size) & (buff_indices >= i * self.class_size)))[0]
            for j in class_indices:
                self.buff[j] = {
                    'label': class_data['label'],
                    'features': class_data['features'][buff_indices[j] % self.class_size]
                }
    
    def batch_load(self, i):
        buff_i = i % self.batches_in_buff
        if (buff_i == 0):
            self.buff_gen(self.buff_num)
            self.buff_num += 1
            
        return self.buff[buff_i * self.batch_size:(buff_i + 1) * self.batch_size]

In [15]:
class Classifier(nn.Module):
    def __init__(self):
        fc_layers = nn.Sequential(
            nn.Linear(2048, 5),
            nn.Softmax(dim=1)
        )
        super(Classifier, self).__init__()
        self.fc = fc_layers
        
    def forward(self, x):
        out = self.fc(x)
        return out

In [19]:
classifier = Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.0001)

In [20]:
class_size = 1024
num_classes = 5
first_class = 0
train_size = class_size * num_classes
batch_size = 128
batches_in_buff = 4
buff_size = batch_size * batches_in_buff

for epoch in range(7):
    loader = BatchLoader(class_size, num_classes, first_class, batch_size, batches_in_buff, 'Data/SynthFeatures/')
    for i in range(train_size // batch_size):
        batch_tuple = loader.batch_load(i)
        images = torch.zeros(batch_size, 2048, device=device, requires_grad=False)
        labels = torch.zeros(batch_size, device=device, requires_grad=False, dtype=int)
        for k in range(batch_size):
            images[k] = batch_tuple[k]['features']
            labels[k] = batch_tuple[k]['label']
        
        predict = classifier(images)
        loss = criterion(predict, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_value = loss.detach()
    
    # if (epoch % 10 == 0):
    print('Epoch {} Loss = {}'.format(epoch, loss_value))

Epoch 0 Loss = 1.4712327718734741
Epoch 1 Loss = 1.2442599534988403
Epoch 2 Loss = 1.1964941024780273
Epoch 3 Loss = 1.1142165660858154
Epoch 4 Loss = 1.0954577922821045
Epoch 5 Loss = 1.0322712659835815
Epoch 6 Loss = 1.024875283241272


In [21]:
class_size = 600
num_classes = 5
first_class = 95
train_size = class_size * 5
batch_size = 100
batches_in_buff = 10
buff_size = batch_size * batches_in_buff
loader = BatchLoader(class_size, num_classes, first_class, batch_size, batches_in_buff, 'Data/PickledClasses/')

correct = 0
total = 0
for i in range(train_size // batch_size):
    batch_tuple = loader.batch_load(i)
    images = torch.zeros(batch_size, 2048, device=device, requires_grad=False)
    labels = torch.zeros(batch_size, device=device, requires_grad=False, dtype=int)
    for k in range(batch_size):
        images[k] = batch_tuple[k]['features']
        labels[k] = batch_tuple[k]['label'] - 95 # don't forget about this
        
    predict = classifier(images)
    _, predicted = torch.max(predict.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

accuracy = 100 * correct / total
print('Accuracy on FSL task = {} %'.format(accuracy))

Accuracy on FSL task = 69.19999694824219 %


# Saving and loading models

In [604]:
torch.save(G.to('cpu').state_dict(), 'Models/G')
G.to(device)

DeltaEncoderGenerator(
  (encoder): Sequential(
    (0): Linear(in_features=4096, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=8, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=2056, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=2048, bias=True)
  )
  (dropout): Dropout(p=0.5, inplace=False)
)

In [21]:
torch.save(classyfier.to('cpu').state_dict(), 'Models/classyfier')
classyfier.to(device)

Classyfier(
  (fc): Sequential(
    (0): Linear(in_features=2048, out_features=5, bias=True)
    (1): Softmax(dim=1)
  )
)

In [4]:
classyfier.load_state_dict(torch.load("Models/classyfier"))

<All keys matched successfully>

In [35]:
G.load_state_dict(torch.load("Models/G"))

<All keys matched successfully>