In [4]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [5]:
def load_word_embeddings(emb_file, vocab):

    vocab = [v.lower() for v in vocab]

    embeds = {}
    for line in open(emb_file, 'r', encoding="utf8"):
        line = line.strip().split(' ')
        
        key = line[0]
        
        line = [float(i) for i in line[1:len(line)]]
        
        wvec = torch.Tensor(line)#map(float, line[1:]))
        embeds[key] = wvec
        
    embeds = [embeds[k] for k in vocab]
    embeds = torch.stack(embeds)
    print('loaded embeddings', embeds.size())

    return embeds

In [6]:
data_dir = os.getcwd() + "\Dataset"
print(data_dir)

C:\Users\Raven\Desktop\Masters-Projects\Visual Computing Lab\ResNetTest\AttrOpClassification\Dataset


In [7]:
data_transforms = {
    'Seasoned': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Sliced': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Whole': transforms.Compose([ # Dataset for Training
        transforms.Resize(224),
        #transforms.RandomResizedCrop(224), # Random Resized Crop is not well suited for this database
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [95]:
operators = ['Seasoned', 'Sliced', 'Whole']
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in operators}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=10,
                                             shuffle=True, num_workers=10)
              for x in operators}
dataset_sizes = {x: len(image_datasets[x]) for x in operators}

In [96]:
objects = []
for i in operators:
    class_names = image_datasets[i].classes
    print(class_names)
    for j in class_names:
        objects.append(j)

objects = list(set(objects))
print(objects)

['Meat', 'Potato']
['Apple', 'Carrot', 'Meat', 'Onion', 'Potato']
['Apple', 'Carrot', 'Cauliflower', 'Eggs', 'Meat', 'Potato']
['Onion', 'Potato', 'Carrot', 'Meat', 'Eggs', 'Cauliflower', 'Apple']


In [10]:
# Load Base Objects
# Apply Operator To Them
# Compare the distance between that object-attr pair and the generated one,
#      vs the generated one and an unrelated object-attr pair

In [11]:
# Loading Whole Apple for test

In [12]:
inApple = []
while len(inApple) < 5:
    inputs, classes = next(iter(dataloaders['Whole']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inApple.append(inputs[i])
#print(inApple)

In [13]:
# Loading Sliced Apple for test

In [14]:
inSApple = []
while len(inSApple) < 5:
    inputs, classes = next(iter(dataloaders['Sliced']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inSApple.append(inputs[i])
#print(inSApple)

In [15]:
# Loading Seasoned Meat for test

In [16]:
inSeMeat = []
while len(inSeMeat) < 5:
    inputs, classes = next(iter(dataloaders['Seasoned']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inSeMeat.append(inputs[i])
#print(inSeMeat)

In [17]:
feat_extractor = models.resnet18(pretrained=True)
feat_extractor.fc = nn.Sequential()

In [18]:
af = [] # Apple Features
for i in range(len(inApple)):
    af.append(feat_extractor(inApple[i].unsqueeze_(0)))

In [19]:
asf = [] # Apple Sliced Features
for i in range(len(inSApple)):
    asf.append(feat_extractor(inSApple[i].unsqueeze_(0)))

In [20]:
msef = [] # Meat Seasoned Features
for i in range(len(inSeMeat)):
    msef.append(feat_extractor(inSeMeat[i].unsqueeze_(0)))

In [21]:
print(msef[0].shape)

torch.Size([1, 512])


In [22]:


# Triplet Loss = Max(0, (distance between resnet and Attribute applied to object), 
# (distance between resnet and negative Attribute applied to negative object))



# Need to embed object words
# Need to embed attributes
# use bmm to multiply the two
# Compare distance between ... ^^^




In [23]:
objEmb = load_word_embeddings("glove\glove.6B.300d.txt", objects)

loaded embeddings torch.Size([7, 300])


In [24]:
opEmb = load_word_embeddings("glove\glove.6B.300d.txt", operators)

loaded embeddings torch.Size([3, 300])


In [25]:
test = torch.randn(300, 300)
out = torch.bmm(objEmb[0].view(1,1,300), test.view(1,300,300))
out = F.relu(out).view(300)
print(out.size())

torch.Size([300])


In [26]:
objects = ['Meat', 'Potato', 'Eggs', 'Carrot', 'Apple', 'Onion', 'Cauliflower']
operators = ['Seasoned', 'Sliced', 'Whole']

In [27]:
class MLP(nn.Module):
    def __init__(self, inp_dim, out_dim, num_layers=1, relu=True, bias=True):
        super(MLP, self).__init__()
        mod = []
        for L in range(num_layers-1):
            mod.append(nn.Linear(inp_dim, inp_dim, bias=bias))
            mod.append(nn.ReLU(True))

        mod.append(nn.Linear(inp_dim, out_dim, bias=bias))
        if relu:
            mod.append(nn.ReLU(True))

        self.mod = nn.Sequential(*mod)

    def forward(self, x):
        output = self.mod(x)
        return output


In [68]:
class AttrOpModel(nn.Module):
    def __init__(self):
        super(AttrOpModel, self).__init__()
        self.image_embedder = MLP(512, 300) # 512 image features embedded into 50
        self.attr_ops = nn.ParameterList([nn.Parameter(torch.eye(300)) for _ in range(len(operators))])
        self.obj_embedder = nn.Embedding(len(objects), 300)     
        
        pretrained_weight = load_word_embeddings('glove/glove.6B.300d.txt', objects)
        self.obj_embedder.weight.data.copy_(pretrained_weight)

        self.inverse_cache = {}
        
        
    def apply_op(self, obj, op):
        out = torch.bmm(obj.view(1,1,300), op.view(1,300,300))
        out = F.relu(out).view(300)
        return out
        
        
    def train_forward(self, img, obj_label, pos_op_label, neg_obj, neg_op_label):
        anchor = self.image_embedder(img)

        obj_emb = self.obj_embedder(torch.tensor(objects.index(obj_label), dtype=torch.long))
        pos_op = self.attr_ops[operators.index(pos_op_label)]
        positive = self.apply_op(obj_emb, pos_op)

        neg_obj_emb = self.obj_embedder(torch.tensor(objects.index(neg_obj), dtype=torch.long))
        neg_op = self.attr_ops[operators.index(neg_op_label)]
        negative = self.apply_op(neg_obj_emb, neg_op)

        loss_triplet = F.triplet_margin_loss(anchor, positive, negative, margin=0.5)
        print(loss_triplet)
        
    def forward(self, img, obj_label, pos_op_label, neg_obj, neg_op_label):
        if self.training:
            loss, pred = self.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)
        else:
           print("potato") ## Val forward
        self.inverse_cache = {}
        return loss, pred

        

In [69]:
model = AttrOpModel()

loaded embeddings torch.Size([7, 300])


In [70]:
print(operators)
print(objects)

['Seasoned', 'Sliced', 'Whole']
['Meat', 'Potato', 'Eggs', 'Carrot', 'Apple', 'Onion', 'Cauliflower']


In [81]:
img = af[0]
obj_label = "Apple"
pos_op_label = "Whole"
neg_obj = "Meat"
neg_op_label = "Sliced"

In [82]:
model.train_forward(img, obj_label, pos_op_label, neg_obj, neg_op_label)

tensor(0.6139)


In [None]:
def train():
    

In [116]:
def get_neg_pairs(pp):
    np = []
    for i in pp:
        ls = []
        for j in operators:
            for k in objects:
                if j != i[0] and k != i[1]:
                    ls.append([j,k])
        np.append(ls)
    return np

In [117]:
pp = [['Seasoned', 'Meat'], ['Sliced', 'Apple']]
print(get_neg_pairs(pp))

[[['Sliced', 'Onion'], ['Sliced', 'Potato'], ['Sliced', 'Carrot'], ['Sliced', 'Eggs'], ['Sliced', 'Cauliflower'], ['Sliced', 'Apple'], ['Whole', 'Onion'], ['Whole', 'Potato'], ['Whole', 'Carrot'], ['Whole', 'Eggs'], ['Whole', 'Cauliflower'], ['Whole', 'Apple']], [['Seasoned', 'Onion'], ['Seasoned', 'Potato'], ['Seasoned', 'Carrot'], ['Seasoned', 'Meat'], ['Seasoned', 'Eggs'], ['Seasoned', 'Cauliflower'], ['Whole', 'Onion'], ['Whole', 'Potato'], ['Whole', 'Carrot'], ['Whole', 'Meat'], ['Whole', 'Eggs'], ['Whole', 'Cauliflower']]]


In [122]:
batch_size = 11
for op in operators: # for each operator...
    class_names = image_datasets[op].classes
    imgs = []
    pp = []
    np = []
    
    
    
    while len(imgs) < batch_size:
        inputs, classes = next(iter(dataloaders[op]))  # Gather a set of images and classes from them
        
        for i in inputs:
            imgs.append(feat_extractor(i.unsqueeze_(0)))
            if(len(imgs) == batch_size):
                break
        for i in classes:
            pp.append([op, class_names[i]])
            if(len(pp) == batch_size):
                break

    np = get_neg_pairs(pp)
    for i in range(len(imgs)):
        for j in np[i]:
            model.train_forward(imgs[i], pp[i][1], pp[i][0], j[1], j[0])


['Sliced', 'Onion']
tensor(0.2666)
['Sliced', 'Potato']
tensor(0.3822)
['Sliced', 'Carrot']
tensor(0.3019)
['Sliced', 'Eggs']
tensor(0.1418)
['Sliced', 'Cauliflower']
tensor(0.)
['Sliced', 'Apple']
tensor(0.3815)
['Whole', 'Onion']
tensor(0.2666)
['Whole', 'Potato']
tensor(0.3822)
['Whole', 'Carrot']
tensor(0.3019)
['Whole', 'Eggs']
tensor(0.1418)
['Whole', 'Cauliflower']
tensor(0.)
['Whole', 'Apple']
tensor(0.3815)
['Sliced', 'Onion']
tensor(0.2499)
['Sliced', 'Potato']
tensor(0.3808)
['Sliced', 'Carrot']
tensor(0.2926)
['Sliced', 'Eggs']
tensor(0.1324)
['Sliced', 'Cauliflower']
tensor(0.)
['Sliced', 'Apple']
tensor(0.3801)
['Whole', 'Onion']
tensor(0.2499)
['Whole', 'Potato']
tensor(0.3808)
['Whole', 'Carrot']
tensor(0.2926)
['Whole', 'Eggs']
tensor(0.1324)
['Whole', 'Cauliflower']
tensor(0.)
['Whole', 'Apple']
tensor(0.3801)
['Sliced', 'Onion']
tensor(0.3621)
['Sliced', 'Carrot']
tensor(0.3969)
['Sliced', 'Meat']
tensor(0.6041)
['Sliced', 'Eggs']
tensor(0.2508)
['Sliced', 'Cauliflow

tensor(0.7206)
['Whole', 'Eggs']
tensor(0.3600)
['Whole', 'Cauliflower']
tensor(0.1018)
['Whole', 'Apple']
tensor(0.5945)
['Seasoned', 'Onion']
tensor(0.4627)
['Seasoned', 'Potato']
tensor(0.5903)
['Seasoned', 'Meat']
tensor(0.7092)
['Seasoned', 'Eggs']
tensor(0.3653)
['Seasoned', 'Cauliflower']
tensor(0.1022)
['Seasoned', 'Apple']
tensor(0.5802)
['Whole', 'Onion']
tensor(0.4627)
['Whole', 'Potato']
tensor(0.5903)
['Whole', 'Meat']
tensor(0.7092)
['Whole', 'Eggs']
tensor(0.3653)
['Whole', 'Cauliflower']
tensor(0.1022)
['Whole', 'Apple']
tensor(0.5802)
['Seasoned', 'Onion']
tensor(0.4717)
['Seasoned', 'Potato']
tensor(0.5787)
['Seasoned', 'Meat']
tensor(0.6898)
['Seasoned', 'Eggs']
tensor(0.3452)
['Seasoned', 'Cauliflower']
tensor(1.00000e-02 *
       8.2921)
['Seasoned', 'Apple']
tensor(0.5831)
['Whole', 'Onion']
tensor(0.4717)
['Whole', 'Potato']
tensor(0.5787)
['Whole', 'Meat']
tensor(0.6898)
['Whole', 'Eggs']
tensor(0.3452)
['Whole', 'Cauliflower']
tensor(1.00000e-02 *
       8.2921

In [None]:
inApple = []
while len(inApple) < 5:
    inputs, classes = next(iter(dataloaders['Whole']))
    #print(classes)
    #print(classes.data[0])
    for i in range(len(inputs)):
        if classes.data[i] == 0:
            inApple.append(inputs[i])
#print(inApple)

In [None]:
af = [] # Apple Features
for i in range(len(inApple)):
    af.append(feat_extractor(inApple[i].unsqueeze_(0)))