In [5]:
import numpy as np
from PIL import Image

In [6]:
image = np.load("image.npy") 
print(image.shape)

(210, 160, 3)


In [7]:
img = Image.fromarray(image, 'RGB')  # image should be (h, w, 3)
img.save('monte_stair.png')
#img.show()


In [158]:
import os
import cv2
import numpy as np
from PIL import Image
import numpy as np
import os
import pandas as pd
import torch
import torch.utils.data
import argparse
import time
import csv
import torch.nn as nn
from torch import optim
import math
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from collections import namedtuple
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence, PackedSequence
from inferno.trainers.basic import Trainer
from inferno.trainers.callbacks.base import Callback
import torch.nn.functional as F
import time


def tuple_tostring(tuple):
	return ','.join(tuple[1:-2].split(" "))

def parse_txt(full_textname): 
	file_ = open(full_textname)
	parsed_text = list(filter(None, [tuple_tostring(line) for line in file_]))
	file_.close()	
	return parsed_text

def parse_annotation(text_dir, img_dir, label_file):
    '''
    new parse_annotation code
    example use:
    text_dir = '/Users/sw/programming/10703/project/yolo-boundingbox-labeler-GUI/bbox_txt'
    img_dir = '/Users/sw/programming/10703/project/yolo-boundingbox-labeler-GUI/images'
    '''
    # LABELS dict
    labels_ = parse_txt(label_file)
    LABELS = {}
    IDX_TO_LABELSTR = {}
    i = 0
    for label in labels_:
        LABELS[label] = i
        IDX_TO_LABELSTR[i] = label
        i += 1
    CLASS = len(LABELS)

    all_imgs = []
    all_labels = []

    file_names = [name for name in os.listdir(text_dir) if name.endswith('.txt')] # based on those with bboxes

    for file_name in file_names:
        img = {'object':[]}

        full_textname = text_dir + '/' + file_name
        full_imgname = img_dir + '/' + file_name.replace('txt', 'png') # TODO

        img_label = parse_txt(full_textname)

        label_encode = np.zeros((1,CLASS)).astype(int)
        for label in img_label:
            label_encode[0, LABELS[label]] = 1

        im = Image.open(full_imgname)
        image = np.array(im)
        im.close()
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        #img_ = Image.fromarray(image, 'L')
        #img_.show()
        
        image = cv2.resize(image, (84, 84), interpolation = cv2.INTER_CUBIC)[np.newaxis,np.newaxis,:,:]
        
        all_labels.append(label_encode)
        all_imgs.append(image)

    all_labels = np.concatenate(all_labels)	
    all_imgs = np.concatenate(all_imgs)	

    return all_imgs, all_labels, LABELS, IDX_TO_LABELSTR, CLASS



In [183]:
class CNNModel(torch.nn.Module):
    def __init__(self, args, dataloader, valid_dataloader, CLASSES, IDX_TO_LABELSTR, dropout=0.5):
        super(CNNModel, self).__init__()
        self.dropout = dropout
        self.IDX_TO_LABELSTR = IDX_TO_LABELSTR
        self.CLASSES = CLASSES
        print(CLASSES)

        # input (1, 84, 84)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=8, stride=4)
        self.relu1 = nn.ReLU()
        #self.bn1 = nn.BatchNorm1d(256)
        #self.drop1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2)
        self.relu2 = nn.ReLU()
        #self.bn2 = nn.BatchNorm1d(384)
        #self.drop2 = nn.Dropout(dropout)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1)
        self.relu3 = nn.ReLU()
        #self.bn3 = nn.BatchNorm1d(512)
        #self.drop3 = nn.Dropout(dropout)

        # flatten

        self.linear1 = nn.Linear(in_features=3136,
                                    out_features=256)
        self.relulinear1 = nn.ReLU()
        #self.droplinear1 = nn.Dropout(0.15)

        for i, class_num in enumerate(CLASSES):
            setattr(self, 'projection_{}'.format(i), nn.Linear(in_features=256,
                                        out_features=class_num))
            #self.projection = nn.Linear(in_features=256,
            #                            out_features=CLASSES[0])
        self.sigmoid = nn.Sigmoid()

        # initialization
        #self.apply(wsj_initializer)

        self.args = args
        self.dataloader = dataloader
        self.valid_dataloader = valid_dataloader
        self.criterion = nn.BCELoss()
        self.best_validation_acc = 0
        self.model_param_str = 'weights'

        self.optimizer = optim.Adam(self.parameters(), lr=args.init_lr)
        #self.optimizer = optim.RMSprop(self.parameters(), lr=args.init_lr)
        #self.optimizer = optim.SGD(self.parameters(), lr=args.init_lr)
        if torch.cuda.is_available():
            self.cuda()

    def forward(self, x):
        h = self.relu1(self.conv1(x))
        h = self.relu2(self.conv2(h))
        h = self.relu3(self.conv3(h))

        # flatten
        h_size = h.size()
        h = h.view(h_size[0], -1)

        h = self.relulinear1(self.linear1(h))

        #output = self.sigmoid(self.projection(h))
        outputs = []
        for i in range(len(CLASSES)):
            outputs.append(self.sigmoid(getattr(self, 'projection_{}'.format(i))(h)))
            
        outputs = torch.cat(outputs, 1)
        
        return outputs

    def model_train(self):
        for i in range(self.args.epochs):
            print("---------epoch {}---------".format(i))
            start_time = time.time()
            self.train()  # right place

            losses = 0
            total_cnt = 0

            for input_x, label in self.dataloader:
                self.zero_grad()

                output = self.forward(to_variable(input_x)) 

                loss = self.criterion(output, to_variable(label))

                total_cnt += 1
                losses += loss.data[0]

                loss.backward()
                self.optimizer.step()
            print("training loss: {}".format(losses / total_cnt / self.args.batch_size))
            validation_acc = self.evaluate()
            if validation_acc > self.best_validation_acc:
                print("--------saving best model--------")
                self.model_param_str = \
                    'parser_epoch_{}_loss_{}_valacc_{}'.format(
                        i, losses / total_cnt / self.args.batch_size, validation_acc)
                torch.save(self.state_dict(), self.model_param_str + '.t7')
                self.best_validation_acc = validation_acc

            print("--- %s seconds ---" % (time.time() - start_time))	

        return self.model_param_str		

    def evaluate(self):
        self.eval()

        losses = 0
        total_cnt = 0
        validation_accs = []
        for input_x, label in self.valid_dataloader:
            total_cnt += 1
            output = self.forward(to_variable(input_x))

            loss = self.criterion(output, to_variable(label))
            losses += loss.data[0]
            
            output = output.data
            cond1 = output < 0.5
            cond2 = output >= 0.5
            output[cond1] = 0
            output[cond2] = 1
            shape = output.shape
            #print("shape:", shape) # (B, Class_num)
            #print("output", output)
            #print("label", label)
            validation_accs.append(torch.sum(output == label) / shape[0] / shape[1])

        losses /= total_cnt * self.args.batch_size
        print("validation loss: {}".format(losses))
        validation_acc = np.mean(validation_accs)
        print("validation accuracy: {}".format(validation_acc))
        return validation_acc
    
    def decode_state(self, input_x):
        self.eval()
        
        output = self.forward(to_variable(input_x))

        output = output.data.numpy()[0] # TODO
        decoded_state = get_state(output, self.IDX_TO_LABELSTR)
        
        return decoded_state

def get_state(img_label, IDX_TO_LABELSTR):
    labels = []
    for i, logit in enumerate(img_label):
        if logit >= 0.5:
            labels.append(IDX_TO_LABELSTR[i])
    return labels

In [25]:
def to_tensor(numpy_array, datatype):
	# Numpy array -> Tensor
	if datatype == 'int':
		return torch.from_numpy(numpy_array).int()
	elif datatype == 'long':
		return torch.from_numpy(numpy_array).long()
	else:
		return torch.from_numpy(numpy_array).float()


def to_variable(tensor, cpu=False):
	# Tensor -> Variable (on GPU if possible)
	if torch.cuda.is_available() and not cpu:
		# Tensor -> GPU Tensor
		tensor = tensor.cuda()
	return torch.autograd.Variable(tensor)

In [78]:
class myDataset(torch.utils.data.Dataset):
	def __init__(self, input_x, labels, test=False):
		self.input_x = torch.from_numpy(input_x).float()
		self.labels = torch.from_numpy(labels).float()

	def __getitem__(self, index):
		return self.input_x[index], self.labels[index]

	def __len__(self):
		return len(self.input_x)

In [6]:
from sklearn.model_selection import train_test_split

In [159]:

batch_size = 8

all_imgs, all_labels, LABELS, IDX_TO_LABELSTR, CLASS = parse_annotation('lev1_labeled/imgLevel1Label', 'lev1_labeled/imgLevel1Label', 'lev1_labeled/0_allpossible.txt')

X_train, X_valid, y_train, y_valid = train_test_split(
        all_imgs, all_labels, random_state=6060, train_size=0.75)


In [89]:
args = namedtuple('args',
                  [
                      'batch_size',
                      'save_directory',
                      'epochs',
                      'init_lr',
                      'cuda'])(
    32,
    'save_weights/',
    40,
    1e-4,
    False)

kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
train_loader = DataLoader(
    myDataset(X_train, y_train), shuffle=True,
    batch_size=args.batch_size, **kwargs)
valid_loader = DataLoader(
    myDataset(X_valid, y_valid), shuffle=True,
    batch_size=args.batch_size, **kwargs)


In [187]:
CLASSES = [15, 3]
model = CNNModel(args, train_loader, valid_loader, CLASSES, IDX_TO_LABELSTR, dropout=0.5)


[15, 3]


In [188]:
model_param_str = 'parser_epoch_23_loss_0.0002506485904053658_valacc_0.99765625'
pretrained_dict = torch.load(model_param_str + '.t7')
model_dict = model.state_dict()

# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict) 
# 3. load the new state dict
model.load_state_dict(model_dict)

In [168]:
model_param_str = 'parser_epoch_23_loss_0.0002506485904053658_valacc_0.99765625'
model.load_state_dict(torch.load(model_param_str + ".t7"))

KeyError: 'unexpected key "projection.weight" in state_dict'

In [165]:
for x in valid_loader:
    #print(x)
    model.decode_state(x[0][1:2])
    break

Variable containing:
(0 ,0 ,.,.) = 
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
      ...         ⋱        ...      
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
[torch.FloatTensor of size 1x1x84x84]

Variable containing:

Columns 0 to 5 
 1.0000e+00  3.0032e-08  1.6336e-02  6.1793e-03  7.0580e-04  2.4750e-05

Columns 6 to 11 
 9.9755e-01  2.1443e-03  3.0528e-04  2.5332e-06  1.0000e+00  8.4269e-06

Columns 12 to 14 
 9.9999e-01  1.0000e+00  9.9827e-01
[torch.FloatTensor of size 1x15]

[9.9999976e-01 3.0031810e-08 1.6336488e-02 6.1792973e-03 7.0579525e-04
 2.4749941e-05 9.9754673e-01 2.1443346e-03 3.0527558e-04 2.5331933e-06
 9.9999750e-01 8.4269141e-06 9.9998593e-01 9.9999976e-01 9.9826652e-01]
['actorInRoom,room_1', 'actorOnSpot,room_1,conveyor_1', 'keyExists,room_1,key_1', 'doorExists,room_1,door_1', 'doorExists,room_1,door_2', 'monsterExists,room_1,sku

In [156]:
LABELS

{'actorInRoom,room_1': 0,
 'actorInRoom,room_2': 1,
 'actorOnSpot,room_1,chain_1': 5,
 'actorOnSpot,room_1,conveyor_1': 6,
 'actorOnSpot,room_1,conveyor_2': 7,
 'actorOnSpot,room_1,entrance_1': 8,
 'actorOnSpot,room_1,entrance_2': 9,
 'actorOnSpot,room_1,ladder_1': 2,
 'actorOnSpot,room_1,ladder_2': 3,
 'actorOnSpot,room_1,ladder_3': 4,
 'actorWithKey': 11,
 'doorExists,room_1,door_1': 12,
 'doorExists,room_1,door_2': 13,
 'keyExists,room_1,key_1': 10,
 'monsterExists,room_1,skull_1': 14}

In [None]:
state = torch.Tensor([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1])
model.decode_state(state)

In [185]:
model.evaluate()

validation loss: 0.02394154667854309
validation accuracy: 0.49291666666666667


0.49291666666666667

In [186]:
model.model_train()

---------epoch 0---------
training loss: 0.00879058318043297
validation loss: 0.006028550677001476
validation accuracy: 0.9573437499999999
--------saving best model--------
--- 1.3628971576690674 seconds ---
---------epoch 1---------
training loss: 0.004899389925412834
validation loss: 0.004130142246140167
validation accuracy: 0.9674479166666666
--------saving best model--------
--- 1.3731269836425781 seconds ---
---------epoch 2---------
training loss: 0.0033585435012355447
validation loss: 0.0028550749411806464
validation accuracy: 0.9769791666666667
--------saving best model--------
--- 1.3687889575958252 seconds ---
---------epoch 3---------
training loss: 0.0025308078408918596
validation loss: 0.002140058742952533
validation accuracy: 0.9864583333333333
--------saving best model--------
--- 1.3870577812194824 seconds ---
---------epoch 4---------
training loss: 0.0019815760632892225
validation loss: 0.0019965337560279295
validation accuracy: 0.9819270833333333
--- 1.37889170646667

KeyboardInterrupt: 

In [27]:
model.train()

CNNModel(
  (conv1): Conv2d (1, 32, kernel_size=(8, 8), stride=(4, 4))
  (relu1): ReLU()
  (conv2): Conv2d (32, 64, kernel_size=(4, 4), stride=(2, 2))
  (relu2): ReLU()
  (conv3): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu3): ReLU()
  (linear1): Linear(in_features=3136, out_features=256)
  (relulinear1): ReLU()
  (projection): Linear(in_features=256, out_features=15)
  (criterion): BCELoss(
  )
)

In [33]:
for x in valid_loader:
    print(np.max(x[0]))
    break

TypeError: max received an invalid combination of arguments - got (out=NoneType, axis=NoneType, ), but expected one of:
 * no arguments
 * (torch.ByteTensor other, *, torch.ByteTensor out)
      didn't match because some of the keywords were incorrect: axis
 * (int dim, *, tuple[torch.ByteTensor, torch.LongTensor] out)
 * (int dim, bool keepdim, *, tuple[torch.ByteTensor, torch.LongTensor] out)
