# Imports

In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as t
import torch.utils.data as data_utils
import torch.optim as optim

from MobileNetV2 import MobileNetV2

import PIL.Image as Image
import numpy as np

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics  import f1_score

import io
import random
import itertools

import matplotlib.pyplot as plt
%matplotlib inline

# Dataset Loader

In [None]:
class AmazonFashionDatasetLoader(data_utils.Dataset):
    def __init__(self, datapath, transforms, train=True):
        [user_train, user_validation, user_test, Item, self.usernum, self.itemnum] = self.load_numpy(datapath)
        self.categories_dict, self.num_categories = self.count_categories(Item)
        self.item_dict = self.build_item_dict(Item)
        self.transforms = transforms
        random.seed(SEED)
        self.dataset, self.max_length = self.preprocess(user_train) if train else self.preprocess(user_validation)
        
    def load_numpy(self, datapath, encoding='bytes'):
        return np.load(datapath, encoding=encoding)
    
    def process_categories(self, category_list):
        categories = list(itertools.chain.from_iterable(category_list))
        categories = [x.decode('utf8').split(',') for x in categories]
        categories = list(set(itertools.chain.from_iterable(categories)))
        return categories
    
    def count_categories(self, items):
        category_set = set()
        for i in range(self.itemnum):
            item = items[i]
            for category in self.process_categories(item[b'categories']):
                category_set.add(category)
        categories = list(category_set)
        categories = {k: v for v, k in enumerate(categories)}
        return categories, len(categories)
    
    def to_one_hot(self, categories):
        indices = np.array(categories)
        onehot = np.zeros(self.num_categories)
        onehot[indices] = 1
        return onehot
    
    def build_item_dict(self, items):
        asin_to_pic = {}
        for i in range(self.itemnum):
            item = items[i]
            asin = item[b'asin'].decode('utf8')
            asin_to_pic[asin] = {}
            asin_to_pic[asin]['img'] = item[b'imgs']
            category_indices = [self.categories_dict[x] for x in self.process_categories(item[b'categories'])]
            asin_to_pic[asin]['label'] = self.to_one_hot(category_indices)
        return asin_to_pic
    
    def preprocess(self, numpy_dataset):
        max_length = 0
        dataset = []
        for i in range(self.usernum):
            user = numpy_dataset[i]
            items = [entry['asin'] for entry in user]
            if max_length < len(items):
                max_length = len(items)
            imgs = [self.item_dict[asin]['img'] for asin in items]
            labels = [self.item_dict[asin]['label'] for asin in items]
            labels = np.clip(np.sum(np.array(labels), axis=0), a_min=0, a_max=1)
            dataset.append({'imgs': imgs, 'labels': labels})
        
        random.shuffle(dataset)
        return dataset, max_length
    
    def preprocess_img(self, img):
        img = Image.open(io.BytesIO(img)).convert('RGB')
        return img            
        
    def __getitem__(self, index):
        user = self.dataset[index]
        
        X = [self.preprocess_img(entry) for entry in user['imgs']]
        X = [self.transforms(img) for img in X]
        Y = user['labels']
        return torch.stack(X), torch.from_numpy(np.array(Y)).float()
    
    def __len__(self):
        return self.usernum
    

In [2]:
class AmazonFashionExtractedDataset(data_utils.Dataset):
    def __init__(self, datapath_x, datapath_y, train=True):
        self.dataset = np.load(datapath_x)
        self.labels = np.load(datapath_y)
        
        self.dataset = self.dataset[:int(self.dataset.shape[0]*0.85)] if train else self.dataset[int(self.dataset.shape[0]*0.85):]
        self.labels = self.labels[:int(self.labels.shape[0]*0.85)] if train else self.labels[int(self.labels.shape[0]*0.85):]
        print('length of X = %d, length of Y = %d' % (len(self.dataset), len(self.labels)))
        
    def __len__(self):
        return self.dataset.shape[0]
    
    def __getitem__(self, index):
        return torch.from_numpy(self.dataset[index]).float(), torch.from_numpy(self.labels[index]).float()

# Utils

In [16]:
def calculate_metrics(predictions, labels):
    y_hat = np.squeeze(torch.sigmoid(predictions).to('cpu').numpy()) >= 0.5
    y_hat = y_hat.astype('int')
    
    y = np.squeeze(labels.to('cpu').numpy())
    
    precision = precision_score(y, y_hat, average='weighted')
    recall = recall_score(y, y_hat, average='weighted')
    f1 = f1_score(y, y_hat, average='weighted')
    return precision, recall, f1

In [None]:
def extract_and_save_features(extractor, data, length, train):
    model.eval()
    x_file = 'amazon_fashion_x_train' if train else 'amazon_fashion_x_test'
    y_file = 'amazon_fashion_y_train' if train else 'amazon_fashion_y_test'
    with torch.no_grad():
        x_complete = []
        y_complete = []
        for i in range(length):
            X, Y = data[i]
            if X.shape[0] == 1:
                continue
            x_complete.append(extractor(X).numpy())
            y_complete.append(Y.numpy())

            if (i % 10 == 9):
                print('%d / %d' % (i, length))
            if (i % 1000 == 999):
                np.save(x_file, x_complete)
                np.save(y_file, y_complete)
        
        np.save(x_file, x_complete)
        np.save(y_file, y_complete)
        

# Layers

In [4]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [5]:
class SqueezeExpandAttention(nn.Module):
    def __init__(self, embeding_dimension, squeeze_dimension):
        super(SqueezeExpandAttention, self)
        self.squeeze = nn.Linear(embeding_dimension, squeeze_dimension)
        self.q0 = nn.Linear(squeeze_dimension, 1)
        self.W = nn.Linear(squeeze_dimension, squeeze_dimension)
        self.expand = nn.Linear(squeeze_dimension, embeding_dimension)
        
    def forward(self, x):
        # x = b x n x k
        squeezed = self.squeeze(x) # b x n x s
        
        r0 = self.q0(squeezed) # b x n x 1
        r0 = F.softmax(r0, dim=1)
        r0 = torch.matmul(torch.transpose(x, 1, 2), r0) # (b x k x n) times (b x n x 1) = (b x k x 1)
        r0 = torch.squeeze(r0, dim=-1) # b x k
        r0 = self.squeeze(r0) # b x s
        
        q1 = self.W(r0) # b x s
        q1 = torch.squeeze(q1, dim=1)
        q1 = torch.tanh(q1)
        q1 = self.expand(q1) # b x k
        
        r1 = torch.matmul(x, torch.unsqueeze(q1, dim=-1)) # (b x n x k) times (b x k x 1) = (b x n x 1)
        r1 = r1.squeeze(dim=-1)
        r1 = F.softmax(r1, dim=1) # b x n
        r1 = torch.matmul(torch.transpose(x, 1, 2), torch.unsqueeze(r1, dim=-1)) # (b x k x n) times (b x n x 1) = (b x k x 1)
        r1 = torch.squeeze(r1, dim=-1) # b x k
        #r1 = torch.squeeze(r1, dim=1)
        return r1

In [6]:
class FCAttentionModule(nn.Module):
    def __init__(self, embedding_dimension):
        super(FCAttentionModule, self).__init__()
        self.q0 = nn.Linear(embedding_dimension, 1)
        self.W = nn.Linear(embedding_dimension, embedding_dimension)
    
    def forward(self, x):
        r0 = self.q0(x)
        r0 = F.softmax(r0, dim=1)
        r0 = torch.matmul(torch.transpose(x, 1, 2), r0)
        r0 = torch.squeeze(r0, dim=-1)
        #r0 = torch.squeeze(r0, dim=1)
        
        q1 = self.W(r0)
        q1 = torch.squeeze(q1, dim=1)
        q1 = torch.tanh(q1)

        r1 = torch.matmul(x, torch.unsqueeze(q1, dim=-1))
        r1 = r1.squeeze(dim=-1)
        r1 = F.softmax(r1, dim=1)
        r1 = torch.matmul(torch.transpose(x, 1, 2), torch.unsqueeze(r1, dim=-1))
        r1 = torch.squeeze(r1, dim=-1)
        r1 = torch.squeeze(r1, dim=1)
        return r1

In [7]:
class ContextGating(nn.Module):
    def __init__(self, embedding_dimension):
        super(ContextGating, self).__init__()
        self.gate = nn.Linear(embedding_dimension, embedding_dimension)
    
    def forward(self, x):
        context = self.gate(x)
        context = torch.tanh(x)
        return context * x

In [8]:
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        self.mobilenet = MobileNetV2(n_class=1000)
        state_dict = torch.load('mobilenet_v2.pth.tar', map_location='cpu') # add map_location='cpu' if no gpu
        self.mobilenet.load_state_dict(state_dict)
        self.mobilenet.classifier = Identity()
    
    def forward(self, x):
        return self.mobilenet(x)

# Models

In [None]:
SplitSEAttentionModel(nn.Module):
    

In [9]:
class SplitBaselineModel(nn.Module):
    def __init__(self, num_classes):
        super(SplitBaselineModel, self).__init__()
        self.fc_attention_1 = FCAttentionModule(EMBEDDING_DIMENSION)
        self.context_gating = ContextGating(EMBEDDING_DIMENSION)
        self.classifier = nn.Linear(EMBEDDING_DIMENSION, num_classes)
    
    def calculate_batch_padding(self, batch):
        #print(np.array([x.shape[0] for x in batch]))
        return np.max(np.array([x.shape[0] for x in batch]), axis=0)  
    
    def pad_user(self, user, max_len):
        if user.shape[0] < max_len:
            zero_pad = torch.zeros(*[max_len - user.shape[0], EMBEDDING_DIMENSION])
            tensor_list = [torch.squeeze(user[i, :]) for i in range(user.shape[0])]
            for i in range(zero_pad.shape[0]):
                tensor_list.append(zero_pad[i, :])
            stacked = torch.stack(tensor_list)
            #print(stacked.shape)
            return stacked
        else:
            return user
        
    def forward(self, x):
        #print('~~~~~')
        padding_length = self.calculate_batch_padding(x)
        padded_shape = [len(x), padding_length, EMBEDDING_DIMENSION]
        padded_features = torch.empty(*padded_shape)  # batch x max_length x embedding_dim
        for i in range(len(x)):
            padded_features[i] = self.pad_user(x[i], padding_length)
        #print(padded_features.shape)
        aggregated = self.fc_attention_1(padded_features)
        #print(aggregated.shape)
        gated = self.context_gating(aggregated)
        #print(gated.shape)
        prediction = self.classifier(gated)
        #print(prediction.shape)
        return prediction

In [None]:
class EndToEndModel():
    def __init__(self, num_classes, finetune=False):
        #super(BaselineModel, self).__init__()
        self.feature_extractor = FeatureExtractor()
        if not finetune:
            disable_base_training()
        self.fc_attention_1 = FCAttentionModule(EMBEDDING_DIMENSION)
        self.context_gating = ContextGating(EMBEDDING_DIMENSION)
        self.classifier = nn.Linear(EMBEDDING_DIMENSION, num_classes)
    
    def disable_base_training(self):
        for param in self.feature_extractor.parameters():
            param.requires_grad  = False
            
    def calculate_batch_padding(self, batch):
        return np.max(batch, axis=1)  
    
    def pad_user(self, user, max_len):
        if user.shape[0] < max_len:
            zero_pad = torch.zeros(max_len - user.shape[0], EMBEDDING_DIMENSION)
            
            return torch.stack(user, zero_pad)
        else:
            return user
        
            
    def forward(self, x):
        padding_length = calculate_batch_padding(x)
        padded_shape = [len(x), padding_length, EMBEDDING_DIMENSION]
        padded_features = torch.empty(*padded_shape)  # batch x max_length x embedding_dim
        for i in range(len(x)):
            imgs = x[i]
            extracted_features = self.feature_extractor(imgs)
            padded_features[i] = pad_user(extracted_features, padding_length)
        aggregated = self.fc_attention_1(padded_features)
        gated = self.context_gating(aggregated)
        prediction = self.classifier(gated)
        return prediction

# Setup Environment

In [10]:
# device = 'cuda' if torch.cuda 
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 32
EMBEDDING_DIMENSION = 1280
NUMBER_OF_EPOCHS = 50
INPUT_SIZE = 224
SEED = 5046
TRANSFORMS = t.Compose([
    t.Resize((INPUT_SIZE, INPUT_SIZE)),
    t.ToTensor(),
    t.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Experiments :

In [11]:
def train_one_epoch(epoch_number):
    SCHEDULER.step()
    MODEL.train()
    running_loss = 0
    for i in range(TRAINSET_BATCHED_LENGTH):
        X = []
        Y = []
        for j in range(BATCH_SIZE):
            x, y = TRAINSET[BATCH_SIZE * i + j]
            X.append(x.to(DEVICE))
            Y.append(y)
        Y = torch.stack(Y).to(DEVICE)
        
        OPTIMIZER.zero_grad()
        
        outputs = MODEL.forward(X)
        loss = CRITERION(outputs, Y)
        loss.backward()
        OPTIMIZER.step()
        
        running_loss += loss.item()
        if i % 500 == 499:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch_number + 1, i + 1, running_loss / 2000))
            running_loss = 0

In [12]:
def eval_after_epoch(epoch_number, best_loss):
    MODEL.eval()
    loss = 0
    average_precision = 0
    average_recall = 0
    average_f1 = 0
    with torch.no_grad():
        for i in range(TESTSET_BATCHED_LENGTH):
            X = []
            Y = []
            for j in range(BATCH_SIZE):
                x, y = TESTSET[BATCH_SIZE * i + j]
                X.append(x.to(DEVICE))
                Y.append(y)
            Y = torch.stack(Y).to(DEVICE)
            
            outputs = MODEL.forward(X)
            loss += CRITERION(outputs, Y).item()
            precision, recall, f1 = calculate_metrics(outputs, Y)
            average_precision += precision
            average_recall += recall
            average_f1 += f1
        
        average_precision = average_precision / TESTSET_BATCHED_LENGTH
        average_recall = average_recall / TESTSET_BATCHED_LENGTH
        average_f1 = average_f1 / TESTSET_BATCHED_LENGTH
        loss = loss / TESTSET_BATCHED_LENGTH
        print('[%d] current loss: %.3f, best loss: %.3f | p=%.3f, r=%.3f, f1=%.3f' %
                  (epoch_number + 1, loss, best_loss, average_precision, average_recall, average_f1))
        if loss < best_loss:
            torch.save(MODEL.state_dict(), CHECKPOINT_PATH)
            print('Saving updated Model with loss %.3f' % loss)
            return loss
        else:
            return best_loss
                

### 1

In [None]:

# load data
CHECKPOINT_PATH = './models/FC_Attention_chkpt.pth.tar'
DATA_PATH = './data/AmazonFashion6ImgPartitioned.npy'

TRAINSET = AmazonFashionDatasetLoader(DATA_PATH, TRANSFORMS, train=True)
TRAINSET_BATCHED_LENGTH = int(len(TRAINSET) / BATCH_SIZE)
CLASS_WEIGHTS = np.full(TRAINSET.num_categories, 30)

TESTSET = AmazonFashionDatasetLoader(DATA_PATH, TRANSFORMS, train=False)
TESTSET_BATCHED_LENGTH = int(len(TESTSET) / BATCH_SIZE)

# Create Model
MODEL = BaselineModel(TRAINSET.num_categories)
MODEL.to(DEVICE)

# Create Optimizer and Loss
OPTIMIZER = optim.Adam(model.parameters(), lr=0.1)
SCHEDULER = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

CRITERION = nn.BCEWithLogitsLoss(pos_weight=CLASS_WEIGHTS)

### 2

In [13]:
CHECKPOINT_PATH = './models/FC_Attention_split_chkpt.pth.tar'
NUM_CATEGORIES = 1270

DATA_PATH_X = './data/amazon_fashion_x_train.npy'
DATA_PATH_Y = './data/amazon_fashion_y_train.npy'

TRAINSET = AmazonFashionExtractedDataset(DATA_PATH_X, DATA_PATH_Y, train=True)
TRAINSET_BATCHED_LENGTH = int(len(TRAINSET) / BATCH_SIZE)
#print(CLASS_WEIGHTS)

TESTSET = AmazonFashionExtractedDataset(DATA_PATH_X, DATA_PATH_Y, train=False)
TESTSET_BATCHED_LENGTH = int(len(TESTSET) / BATCH_SIZE)

length of X = 38406, length of Y = 38406
length of X = 6778, length of Y = 6778


In [17]:
# Create Model

MODEL = SplitBaselineModel(NUM_CATEGORIES)
MODEL.to(DEVICE)

# Create Optimizer and Loss
OPTIMIZER = optim.Adam(MODEL.parameters(), lr=0.1)
SCHEDULER = optim.lr_scheduler.StepLR(OPTIMIZER, step_size=10, gamma=0.5)

CLASS_WEIGHTS = torch.from_numpy(np.full(NUM_CATEGORIES, 30)).float()

CRITERION = nn.BCEWithLogitsLoss(pos_weight=CLASS_WEIGHTS)

### train

In [None]:
best_loss = 100000
for ep in range(NUMBER_OF_EPOCHS):
    train_one_epoch(ep)
    best_loss = eval_after_epoch(ep, best_loss)

[1,   500] loss: 0.301
[1,  1000] loss: 0.327


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


[1] current loss: 1.432, best loss: 100000.000 | p=0.355, r=0.813, f1=0.427
Saving updated Model with loss 1.432
[2,   500] loss: 0.212
[2,  1000] loss: 0.222
[2] current loss: 1.600, best loss: 1.432 | p=0.353, r=0.810, f1=0.425
[3,   500] loss: 0.199


# Pre-extract features

In [None]:
train = AmazonFashionDatasetLoader(DATA_PATH, TRANSFORMS, train=True)
#test = AmazonFashionDatasetLoader(DATA_PATH, TRANSFORMS, train=False)

In [None]:
model = FeatureExtractor()
model.eval()
extract_and_save_features(model, train, len(train), train=True)
#extract_and_save_features(model, test, len(test), train=False)

In [None]:
x_train = np.load('x_train.npy')

In [None]:
x_train[0].shap

In [None]:
test_feat = FeatureExtractorWithMaps()
test_feat(torch.zeros(*[1, 3, 224, 224])).shape

In [None]:
test_feat