# Basic Imports and paths

In [1]:
import torch
from fastai.distributed import *
from fastai.vision.all import *
from pathlib import Path


module_path = os.path.abspath(os.path.join('../../baselines'))
if module_path not in sys.path: sys.path.append(module_path)

In [2]:
device = torch.device('cuda:1')
core_pth = Path('/home/agrawalp2/prepositions'); assert core_pth.exists()
spatialsenses_pth = core_pth/Path('real_world_data/spatialsense'); assert spatialsenses_pth.exists()
encoder_path = core_pth/Path('experiments/baselines/models/encoder/GoogleNews-vectors-negative300.bin.gz'); assert encoder_path.exists()

# Language only Model

## imports

In [3]:
from dataloaders.spatialsense.languageOnlyDataset import languageOnlyDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd
from models.static.language_only import SimpleLanguageOnlyModel

## data

In [4]:
train_ds = languageOnlyDataset(annotations_path = spatialsenses_pth/'annotations.json',
                               split = 'train',
                               encoder_path = encoder_path,
                               x_tfms = None, 
                               y_tfms = [map_spatialsenses_to_stupd],)

valid_ds = languageOnlyDataset(annotations_path = spatialsenses_pth/'annotations.json',
                               split = 'valid',
                               encoder_path = encoder_path,
                               x_tfms = None,
                               y_tfms = [map_spatialsenses_to_stupd])

len(train_ds),len(valid_ds)

(5619, 1319)

In [5]:
train_dl = DataLoader(train_ds, batch_size =64 , shuffle = True, drop_last = True)
valid_dl = DataLoader(valid_ds, batch_size = 128 , shuffle = True, drop_last = True)

## training

In [6]:
dls = DataLoaders(train_dl, valid_dl)
dls.n_inp = 2
model = SimpleLanguageOnlyModel(word_embedding_dim=300, feature_dim=512, c=train_ds.c).cuda()
learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [7]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.607804,1.551673,0.407813,0.317455,00:01
1,1.403382,1.562047,0.433594,0.333861,00:01
2,1.280012,1.645923,0.428906,0.359693,00:01
3,1.180857,1.705735,0.421875,0.339253,00:01
4,1.097332,1.816023,0.420312,0.342171,00:01


# 2D Only

In [8]:
from dataloaders.spatialsense.coordinate2dOnlyDataset import coordinate2D_OnlyDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd
from models.static.coordinate_2d_only import coordinateOnlyModel

In [9]:
train_ds = coordinate2D_OnlyDataset(annotations_path = spatialsenses_pth/'annotations.json',
                                    split = 'train',
                                    x_tfms = None, 
                                    y_tfms = [map_spatialsenses_to_stupd])

valid_ds = coordinate2D_OnlyDataset(annotations_path = spatialsenses_pth/'annotations.json',
                                    split = 'valid',
                                    x_tfms = None, 
                                    y_tfms = [map_spatialsenses_to_stupd])

len(train_ds),len(valid_ds)

(5619, 1319)

In [10]:
train_dl = DataLoader(train_ds, batch_size =16, shuffle = True, drop_last = True)
valid_dl = DataLoader(valid_ds, batch_size = 256, shuffle = True, drop_last = True)

## model

In [11]:
dls = DataLoaders(train_dl, valid_dl)
model = coordinateOnlyModel(10, 64, train_ds.c).cuda()
learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [12]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.556026,1.502632,0.453906,0.344798,00:02
1,1.555371,1.497625,0.461719,0.358842,00:02
2,1.538176,1.451486,0.4875,0.373809,00:02
3,1.517611,1.467898,0.478906,0.353542,00:02
4,1.516191,1.436629,0.492969,0.37548,00:02


# DRNet

In [13]:
from dataloaders.spatialsense.drnetDataset import drnetDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd

from models.static.drnet import DRNet

import torchvision.transforms as transforms


In [14]:
train_ds = drnetDataset(annotations_path = spatialsenses_pth/'annotations.json',
                        image_path = spatialsenses_pth/'images',
                        encoder_path = encoder_path,
                        split = 'train',
                        y_category_tfms = [map_spatialsenses_to_stupd],
                        x_img_tfms =     [transforms.ToPILImage("RGB"),
                                        transforms.RandomResizedCrop(224, scale=(0.75, 0.85)),
                                        transforms.ColorJitter(0.1, 0.1, 0.1, 0.05)],
                        bbox_mask_tfms = [transforms.ToPILImage("RGB"),
                                            transforms.Pad(4, padding_mode="edge"),
                                            transforms.RandomResizedCrop(32, scale=(0.75, 0.85))]
                         )

valid_ds = drnetDataset(annotations_path = spatialsenses_pth/'annotations.json',
                        image_path = spatialsenses_pth/'images',
                        encoder_path = encoder_path, 
                        split = 'valid',
                        y_category_tfms = [map_spatialsenses_to_stupd],
                        x_img_tfms =     [transforms.ToPILImage("RGB"),
                                            transforms.CenterCrop(224)],
                        
                        bbox_mask_tfms = [transforms.ToPILImage("RGB"),
                                            transforms.Pad(4, padding_mode="edge"),
                                            transforms.CenterCrop(32)]
                         )

len(train_ds),len(valid_ds)

(5619, 1319)

In [15]:
train_dl = DataLoader(train_ds, batch_size =64 , shuffle = True, num_workers = 0)
valid_dl = DataLoader(valid_ds, batch_size = 128 , shuffle = True, num_workers = 0)

In [16]:
dls = DataLoaders(train_dl, valid_dl)
dls.n_inp = 4

model = DRNet(word_embedding_dim = 300, 
              feature_dim = 512, 
              num_classes = train_ds.c, 
              num_layers = 3,
              imagenet_pretrained = False).cuda()

learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [17]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.590085,1.561749,0.408643,0.295358,01:06
1,1.453299,1.419746,0.488249,0.38018,01:05
2,1.370737,1.418625,0.460197,0.372836,01:05
3,1.310594,1.393764,0.491281,0.420533,01:05
4,1.25017,1.404946,0.503412,0.405813,01:05


# ViPCNN

In [18]:
from dataloaders.spatialsense.vipcnnDataset import vipcnnDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd
from models.static.vipcnn import VipCNN
import torchvision.transforms as transforms

In [19]:
train_ds = vipcnnDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                         split='train',
                         x_tfms = [transforms.ToPILImage("RGB"),
                                   transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
                                  ],
                         y_category_tfms = [map_spatialsenses_to_stupd],
                        )

valid_ds = vipcnnDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                         split='valid',
                         x_tfms = [transforms.ToPILImage("RGB")],
                         y_category_tfms = [map_spatialsenses_to_stupd])

len(train_ds), len(valid_ds)

(5619, 1319)

In [20]:
train_dl = DataLoader(train_ds, batch_size =64 , shuffle = True)
valid_dl = DataLoader(valid_ds, batch_size = 128 , shuffle = True)

In [21]:
%time x = next(iter(train_dl))

CPU times: user 8.13 s, sys: 3.7 ms, total: 8.14 s
Wall time: 1.21 s


In [22]:
dls = DataLoaders(train_dl, valid_dl)
dls.n_inp = 3

model = VipCNN(roi_size = 6, num_classes = train_ds.c, imagenet_pretrained = False).cuda()

learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [23]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.758997,1.774824,0.345716,0.207544,02:10
1,1.657693,1.688529,0.373768,0.24165,02:10
2,1.605509,1.626857,0.39348,0.284832,02:11
3,1.554458,1.596805,0.415466,0.298099,02:11
4,1.509943,1.60327,0.411676,0.290196,02:10


# PPRFCN

In [24]:
from dataloaders.spatialsense.pprfcnDataset import pprfcnDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd
from models.static.pprfcn import PPRFCN
import torchvision.transforms as transforms

In [25]:
train_ds = pprfcnDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                         split='train',
                         x_tfms = [transforms.ToPILImage("RGB"),
                                   transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
                                  ],
                         y_category_tfms = [map_spatialsenses_to_stupd],
                        )

valid_ds = pprfcnDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                         split='valid',
                         x_tfms = [transforms.ToPILImage("RGB")],
                         y_category_tfms = [map_spatialsenses_to_stupd])

len(train_ds), len(valid_ds)

(5619, 1319)

In [26]:
train_dl = DataLoader(train_ds, batch_size =64 , shuffle = True)
valid_dl = DataLoader(valid_ds, batch_size = 128 , shuffle = True)

In [27]:
dls = DataLoaders(train_dl, valid_dl)
dls.n_inp = 3

model = PPRFCN(train_ds.c, imagenet_pretrained = False).cuda()
learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [28]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.830299,1.724167,0.360121,0.248109,06:55
1,1.702148,1.648282,0.392722,0.302793,06:56
2,1.621165,1.591578,0.404094,0.302426,06:54
3,1.566635,1.590252,0.426839,0.313275,06:56
4,1.534065,1.586482,0.423048,0.315444,06:56


# VTransE

In [29]:
from dataloaders.spatialsense.vtranseDataset import vtranseDataset
from dataloaders.spatialsense.utils import  map_spatialsenses_to_stupd
from models.static.vtranse import VtransE
import torchvision.transforms as transforms

In [30]:
train_ds = vtranseDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                          encoder_path = encoder_path, 
                         split='train',
                         x_tfms = [transforms.ToPILImage("RGB"),
                                   transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
                                  ],
                         y_category_tfms = [map_spatialsenses_to_stupd],
                        )

valid_ds = vtranseDataset(annotations_path = spatialsenses_pth/'annotations.json',
                         image_path = spatialsenses_pth/'images',
                          encoder_path = encoder_path, 
                         split='valid',
                          x_category_tfms = None,
    
                         x_tfms = [transforms.ToPILImage("RGB")],
                         y_category_tfms = [map_spatialsenses_to_stupd])

len(train_ds), len(valid_ds)

(5619, 1319)

In [31]:
train_dl = DataLoader(train_ds, batch_size =64 , shuffle = True)
valid_dl = DataLoader(valid_ds, batch_size = 128 , shuffle = True)

In [32]:
dls = DataLoaders(train_dl, valid_dl)
dls.n_inp = 7

model = VtransE(word_embedding_dim = 300, num_classes = train_ds.c, imagenet_pretrained = False).cuda()
learn = Learner(dls, model = model, loss_func = CrossEntropyLossFlat(), metrics = [accuracy,BalancedAccuracy()])

In [33]:
learn.fit(5)

epoch,train_loss,valid_loss,accuracy,balanced_accuracy_score,time
0,1.541802,1.447526,0.457165,0.336466,01:10
1,1.395959,1.362635,0.492798,0.405751,01:10
2,1.305874,1.321776,0.5163,0.436651,01:10
3,1.2313,1.351207,0.517817,0.439933,01:09
4,1.162749,1.35108,0.507961,0.43557,01:09
