In [2]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [3]:
def load_word_embeddings(emb_file, vocab):

    vocab = [v.lower() for v in vocab]

    embeds = {}
    for line in open(emb_file, 'r', encoding="utf8"):
        line = line.strip().split(' ')
        
        key = line[0]
        
        line = [float(i) for i in line[1:len(line)]]
        
        wvec = torch.Tensor(line)#map(float, line[1:]))
        embeds[key] = wvec
        
    embeds = [embeds[k] for k in vocab]
    embeds = torch.stack(embeds)
    print('loaded embeddings', embeds.size())

    return embeds

In [4]:
data_dir = os.getcwd() + "\Dataset"
print(data_dir)

C:\Users\Raven\Desktop\Masters-Projects\Visual Computing Lab\ResNetTest\AttrOpClassification\Dataset


In [5]:
data_transforms = {
    'Seasoned': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Sliced': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Whole': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [6]:
operators = ['Seasoned', 'Sliced', 'Whole']
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in operators}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=10,
                                             shuffle=True, num_workers=10)
              for x in operators}
dataset_sizes = {x: len(image_datasets[x]) for x in operators}

In [7]:
objects = []
for i in operators:
    class_names = image_datasets[i].classes
    print(class_names)
    for j in class_names:
        objects.append(j)

objects = list(set(objects))
print(objects)

['Meat', 'Potato']
['Apple', 'Carrot', 'Meat', 'Onion', 'Potato']
['Apple', 'Carrot', 'Cauliflower', 'Eggs', 'Meat', 'Potato']
['Potato', 'Onion', 'Eggs', 'Carrot', 'Cauliflower', 'Apple', 'Meat']


In [8]:
# Load Base Objects
# Apply Operator To Them
# Compare the distance between that object-attr pair and the generated one,
#      vs the generated one and an unrelated object-attr pair

In [9]:
# Loading Whole Apple for test

In [10]:
inApple = []
while len(inApple) < 5:
    inputs, classes = next(iter(dataloaders['Whole']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inApple.append(inputs[i])
#print(inApple)

In [11]:
# Loading Sliced Apple for test

In [12]:
inSApple = []
while len(inSApple) < 5:
    inputs, classes = next(iter(dataloaders['Sliced']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inSApple.append(inputs[i])
#print(inSApple)

In [13]:
# Loading Seasoned Meat for test

In [14]:
inSeMeat = []
while len(inSeMeat) < 5:
    inputs, classes = next(iter(dataloaders['Seasoned']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inSeMeat.append(inputs[i])
#print(inSeMeat)

In [15]:
feat_extractor = models.resnet18(pretrained=True)
feat_extractor.fc = nn.Sequential()

In [16]:
af = [] # Apple Features
for i in range(len(inApple)):
    af.append(feat_extractor(inApple[i].unsqueeze_(0)))

In [17]:
asf = [] # Apple Sliced Features
for i in range(len(inSApple)):
    asf.append(feat_extractor(inSApple[i].unsqueeze_(0)))

In [18]:
msef = [] # Meat Seasoned Features
for i in range(len(inSeMeat)):
    msef.append(feat_extractor(inSeMeat[i].unsqueeze_(0)))

In [19]:
print(msef[0].shape)

torch.Size([1, 512])


In [20]:


# Triplet Loss = Max(0, (distance between resnet and Attribute applied to object), 
# (distance between resnet and negative Attribute applied to negative object))



# Need to embed object words
# Need to embed attributes
# use bmm to multiply the two
# Compare distance between ... ^^^




In [21]:
objEmb = load_word_embeddings("glove\glove.6B.300d.txt", objects)

loaded embeddings torch.Size([7, 300])


In [22]:
opEmb = load_word_embeddings("glove\glove.6B.300d.txt", operators)

loaded embeddings torch.Size([3, 300])


In [23]:
test = torch.randn(300, 300)
out = torch.bmm(objEmb[0].view(1,1,300), test.view(1,300,300))
out = F.relu(out).view(300)
print(out.size())

torch.Size([300])


In [24]:
objects = ['Meat', 'Potato', 'Eggs', 'Carrot', 'Apple', 'Onion', 'Cauliflower']
operators = ['Seasoned', 'Sliced', 'Whole']

In [25]:
class MLP(nn.Module):
    def __init__(self, inp_dim, out_dim, num_layers=1, relu=True, bias=True):
        super(MLP, self).__init__()
        mod = []
        for L in range(num_layers-1):
            mod.append(nn.Linear(inp_dim, inp_dim, bias=bias))
            mod.append(nn.ReLU(True))

        mod.append(nn.Linear(inp_dim, out_dim, bias=bias))
        if relu:
            mod.append(nn.ReLU(True))

        self.mod = nn.Sequential(*mod)

    def forward(self, x):
        output = self.mod(x)
        return output


In [26]:
class AttrOpModel(nn.Module):
    def __init__(self):
        super(AttrOpModel, self).__init__()
        self.image_embedder = MLP(512, 300) # 512 image features embedded into 50
        self.attr_ops = nn.ParameterList([nn.Parameter(torch.eye(300)) for _ in range(len(operators))])
        self.obj_embedder = nn.Embedding(len(objects), 300)     
        
        pretrained_weight = load_word_embeddings('glove/glove.6B.300d.txt', objects)
        self.obj_embedder.weight.data.copy_(pretrained_weight)

        self.inverse_cache = {}
        
        
    def apply_op(self, obj, op):
        out = torch.bmm(obj.view(1,1,300), op.view(1,300,300))
        out = F.relu(out).view(300)
        return out
        
        
    def train_forward(self, img, obj_label, pos_op_label, neg_obj, neg_op_label):
        anchor = self.image_embedder(img)

        obj_emb = self.obj_embedder(torch.tensor(objects.index(obj_label), dtype=torch.long))
        pos_op = self.attr_ops[operators.index(pos_op_label)]
        positive = self.apply_op(obj_emb, pos_op)

        neg_obj_emb = self.obj_embedder(torch.tensor(objects.index(neg_obj), dtype=torch.long))
        neg_op = self.attr_ops[operators.index(neg_op_label)]
        negative = self.apply_op(neg_obj_emb, neg_op)

        loss_triplet = F.triplet_margin_loss(anchor, positive, negative, margin=0.5)
        return loss_triplet
        
    #def forward(self, img, obj_label, pos_op_label, neg_obj, neg_op_label):
    #    if self.training:
    #        loss, pred = self.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)
    #    else:
    #       print("potato") ## Val forward
    #    self.inverse_cache = {}
    #    return loss, pred

        

In [27]:
model = AttrOpModel()

loaded embeddings torch.Size([7, 300])


In [28]:
print(operators)
print(objects)

['Seasoned', 'Sliced', 'Whole']
['Meat', 'Potato', 'Eggs', 'Carrot', 'Apple', 'Onion', 'Cauliflower']


In [29]:
model.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)

NameError: name 'img' is not defined

In [30]:
def get_neg_pairs(pp):
    np = []
    for i in pp:
        ls = []
        for j in operators:
            for k in objects:
                if j != i[0] and k != i[1]:
                    ls.append([j,k])
        np.append(ls)
    return np

In [31]:
pp = [['Seasoned', 'Meat'], ['Sliced', 'Apple']]
print(get_neg_pairs(pp))

[[['Sliced', 'Potato'], ['Sliced', 'Eggs'], ['Sliced', 'Carrot'], ['Sliced', 'Apple'], ['Sliced', 'Onion'], ['Sliced', 'Cauliflower'], ['Whole', 'Potato'], ['Whole', 'Eggs'], ['Whole', 'Carrot'], ['Whole', 'Apple'], ['Whole', 'Onion'], ['Whole', 'Cauliflower']], [['Seasoned', 'Meat'], ['Seasoned', 'Potato'], ['Seasoned', 'Eggs'], ['Seasoned', 'Carrot'], ['Seasoned', 'Onion'], ['Seasoned', 'Cauliflower'], ['Whole', 'Meat'], ['Whole', 'Potato'], ['Whole', 'Eggs'], ['Whole', 'Carrot'], ['Whole', 'Onion'], ['Whole', 'Cauliflower']]]


In [32]:
attr_params = [param for name, param in model.named_parameters() if 'attr_op' in name and param.requires_grad]
other_params = [param for name, param in model.named_parameters() if 'attr_op' not in name and param.requires_grad]
optim_params = [{'params':attr_params, 'lr':1e-05}, {'params':other_params}]

In [33]:
 optimizer = optim.Adam(optim_params, lr=1e-04, weight_decay=5e-5)

In [34]:
batch_size = 11
loss = 0
for op in operators: # for each operator...
    class_names = image_datasets[op].classes
    imgs = []
    pp = []
    np = []
    
    while len(imgs) < batch_size:
        inputs, classes = next(iter(dataloaders[op]))  # Gather a set of images and classes from them
        
        for i in inputs:
            imgs.append(feat_extractor(i.unsqueeze_(0)))
            if(len(imgs) == batch_size):
                break
        for i in classes:
            pp.append([op, class_names[i]])
            if(len(pp) == batch_size):
                break

    np = get_neg_pairs(pp)
    #for i in range(len(imgs)):
    #    for j in np[i]:               model.train_forward(imgs[i], pp[i][1], pp[i][0], j[1], j[0])
    loss = model.train_forward(imgs[i], pp[i][1], pp[i][0], j[1], j[0])
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

ValueError: 'o' is not in list

In [None]:
attr_params = [param for name, param in model.named_parameters() if 'attr_op' in name and param.requires_grad]
other_params = [param for name, param in model.named_parameters() if 'attr_op' not in name and param.requires_grad]
optim_params = [{'params':attr_params, 'lr':1e-05}, {'params':other_params}]

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-04, weight_decay=5e-5)

In [None]:
img = Variable(af[0])
obj_label = "Apple"
pos_op_label = "Whole"
neg_obj = "Meat"
neg_op_label = "Sliced"

In [None]:
model.train()
model.zero_grad()
loss = model.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()

In [None]:
inApple = []
while len(inApple) < 5:
    inputs, classes = next(iter(dataloaders['Whole']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inApple.append(inputs[i])
#print(inApple)

In [None]:
af = [] # Apple Features
for i in range(len(inApple)):
    af.append(feat_extractor(inApple[i].unsqueeze_(0)))