In [22]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [23]:
def load_word_embeddings(emb_file, vocab):

    vocab = [v.lower() for v in vocab]

    embeds = {}
    for line in open(emb_file, 'r', encoding="utf8"):
        line = line.strip().split(' ')
        
        key = line[0]
        
        line = [float(i) for i in line[1:len(line)]]
        
        wvec = torch.Tensor(line)#map(float, line[1:]))
        embeds[key] = wvec
        
    embeds = [embeds[k] for k in vocab]
    embeds = torch.stack(embeds)
    print('loaded embeddings', embeds.size())

    return embeds

In [24]:
data_dir = os.getcwd() + "\Dataset"
print(data_dir)

C:\Users\Raven\Desktop\Masters-Projects\Visual Computing Lab\ResNetTest\AttrOpClassification\Dataset


In [25]:
data_transforms = {
    'Seasoned': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Sliced': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Whole': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [26]:
operators = ['Seasoned', 'Sliced', 'Whole']
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in operators}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=10,
                                             shuffle=True, num_workers=1)
              for x in operators}
dataset_sizes = {x: len(image_datasets[x]) for x in operators}

In [27]:
objects = ['Meat', 'Potato', 'Eggs', 'Carrot', 'Apple', 'Onion', 'Cauliflower']
operators = ['Seasoned', 'Sliced', 'Whole']

In [28]:
objEmb = load_word_embeddings("glove\glove.6B.300d.txt", objects)

loaded embeddings torch.Size([7, 300])


In [29]:
opEmb = load_word_embeddings("glove\glove.6B.300d.txt", operators)

loaded embeddings torch.Size([3, 300])


In [30]:
####################################
# Problems Start
####################################

In [31]:
class MLP(nn.Module): ## The issue is somewhere in here
    def __init__(self, inp_dim, out_dim, num_layers=1, relu=True, bias=True):
        super(MLP, self).__init__()
        mod = []
        for L in range(num_layers-1):
            mod.append(nn.Linear(inp_dim, inp_dim, bias=bias))
            mod.append(nn.ReLU(True))

        mod.append(nn.Linear(inp_dim, out_dim, bias=bias))
        if relu:
            mod.append(nn.ReLU(True))

        self.mod = nn.Sequential(*mod)

    def forward(self, x):
        output = self.mod(x)
        return output


In [32]:
class AttrOpModel(nn.Module):
    def __init__(self):
        super(AttrOpModel, self).__init__()
        
        self.image_embedder = MLP(512, 300) # 512 image features embedded into 300
        self.attr_ops = nn.ParameterList([nn.Parameter(torch.eye(300)) for _ in range(len(operators))])
        self.obj_embedder = nn.Embedding(len(objects), 300)     
        
        pretrained_weight = load_word_embeddings('glove/glove.6B.300d.txt', objects)
        self.obj_embedder.weight.data.copy_(pretrained_weight)

        self.inverse_cache = {}
        
    def apply_op(self, obj, op):
        out = torch.bmm(obj.view(1,1,300), op.view(1,300,300))
        out = F.relu(out).view(300)
        return out
        
        
    def train_forward(self, img, obj_label, pos_op_label, neg_obj, neg_op_label):
        anchor = self.image_embedder(img) ## This is where the issue is    
        #anchor = torch.zeros(1, 300)
        
        obj_emb = self.obj_embedder(torch.tensor(objects.index(obj_label), dtype=torch.long))
        pos_op = self.attr_ops[operators.index(pos_op_label)]
        positive = self.apply_op(obj_emb, pos_op)

        neg_obj_emb = self.obj_embedder(torch.tensor(objects.index(neg_obj), dtype=torch.long))
        neg_op = self.attr_ops[operators.index(neg_op_label)]
        negative = self.apply_op(neg_obj_emb, neg_op)
        
        triplet_loss = nn.TripletMarginLoss(margin=0.5, p=2)
        loss = triplet_loss(anchor, positive, negative)
        return loss
  

In [33]:
####################################
# Problems End
####################################

In [45]:
model = AttrOpModel()

loaded embeddings torch.Size([7, 300])


In [46]:
def get_neg_pairs(pp):
    np = []
    for i in pp:
        ls = []
        for j in operators:
            for k in objects:
                if j != i[0] and k != i[1]:
                    ls.append([j,k])
        np.append(ls)
    return np

In [47]:
pp = [['Seasoned', 'Meat'], ['Sliced', 'Apple']]
print(get_neg_pairs(pp))

[[['Sliced', 'Potato'], ['Sliced', 'Eggs'], ['Sliced', 'Carrot'], ['Sliced', 'Apple'], ['Sliced', 'Onion'], ['Sliced', 'Cauliflower'], ['Whole', 'Potato'], ['Whole', 'Eggs'], ['Whole', 'Carrot'], ['Whole', 'Apple'], ['Whole', 'Onion'], ['Whole', 'Cauliflower']], [['Seasoned', 'Meat'], ['Seasoned', 'Potato'], ['Seasoned', 'Eggs'], ['Seasoned', 'Carrot'], ['Seasoned', 'Onion'], ['Seasoned', 'Cauliflower'], ['Whole', 'Meat'], ['Whole', 'Potato'], ['Whole', 'Eggs'], ['Whole', 'Carrot'], ['Whole', 'Onion'], ['Whole', 'Cauliflower']]]


In [48]:
attr_params = [param for name, param in model.named_parameters() if 'attr_op' in name and param.requires_grad]
other_params = [param for name, param in model.named_parameters() if 'attr_op' not in name and param.requires_grad]
optim_params = [{'params':attr_params, 'lr':1e-05}, {'params':other_params}]

In [49]:
optimizer = optim.Adam(optim_params, lr=1e-04, weight_decay=5e-5)
feat_extractor = models.resnet18(pretrained=True)
feat_extractor.fc = nn.Sequential()

In [50]:
inApple = []
while len(inApple) < 2:
    inputs, classes = next(iter(dataloaders['Whole']))
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inApple.append(inputs[i])
            
af = [] # Apple Features
for i in range(len(inApple)):
    af.append(feat_extractor(inApple[i].unsqueeze_(0)))

In [55]:
img = Variable(af[0])
obj_label = "Apple"
pos_op_label = "Whole"
neg_obj = "Meat"
neg_op_label = "Sliced"

In [56]:
# "Training" On a single item to test if the functions actually work
model.train()
loss = 0.0
for i in range(0,12):
    loss = model.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)


In [None]:
batch_size = 11
loss = 0
for op in operators: # for each operator...
    class_names = image_datasets[op].classes
    imgs = []
    pp = []
    np = []
    
    while len(imgs) < batch_size:
        inputs, classes = next(iter(dataloaders[op]))  # Gather a set of images and classes from them
        
        for i in inputs:
            imgs.append(feat_extractor(i.unsqueeze_(0)))
            if(len(imgs) == batch_size):
                break
        for i in classes:
            pp.append([op, class_names[i]])
            if(len(pp) == batch_size):
                break

    np = get_neg_pairs(pp)
    #for i in range(len(imgs)):
    #    for j in np[i]:               model.train_forward(imgs[i], pp[i][1], pp[i][0], j[1], j[0])
    loss = model.train_forward(imgs[i], pp[i][1], pp[i][0], j[1], j[0])
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()