In [1]:
import os
import torch
import numpy as np
from PIL import Image
import pickles
from torch.autograd import Variable
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import random
import pandas as pd

ModuleNotFoundError: No module named 'pickles'

In [None]:
class Text2ImageDataset(Dataset):

    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_transform = transforms.Compose([
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])])
        
        self.load_flower_dataset()

    def load_flower_dataset(self):
        # It will return two things : a list of image file names, a dictionary of 5 captions per image
        # with image file name as the key of the dictionary and 5 values(captions) for each key.

        print ("------------------  Loading images  ------------------")
        filepath = os.path.join('C:\Base\\', 'file_caption_map.pickle')
        fileObject = open(filepath,'rb')  
        filenames = pd.read_pickle(fileObject)
        global img_files
        img_files = np.array(list(filenames.keys()))
        self.img_files = img_files

        print('Load filenames from: %s (%d)' % (filepath, img_files.size))

        print ("------------------  Loading captions  ----------------")
        
        self.img_captions = filenames                 
        
        print ("---------------  Loading Skip-thought Model  ---------------")
        embedding_filename = '/file_caption_embedding.pickle'

        with open('C:\Base' + embedding_filename, 'rb') as f:
            embeddings = pd.read_pickle(f)
            self.encoded_captions =  embeddings   
        
        print ("-------------  Encoding of image captions DONE  -------------")

    def read_image(self, image_file_name):
#         print(image_file_name)
        image = Image.open(os.path.join('C:\Base\images\\' + image_file_name))
        # check its shape and reshape it to (64, 64, 3)
        image = image.resize((64, 64))
        return image

    def get_false_img(self, index):
        false_img_id = np.random.randint(len(self.img_files))
        if false_img_id != index:
            return self.img_files[false_img_id]

        return self.get_false_img(index)

    def __len__(self):

        return (img_files.size)

    def __getitem__(self, index):

        sample = {}
#         print(self.image_transform(self.read_image(self.img_files)))
#         print(self.image_transform(self.read_image(self.img_files[index])))
#         print(self.image_transform(self.read_image(self.get_false_img(index))))
        sample['true_imgs'] = self.image_transform(self.read_image(self.img_files[index]))
        sample['false_imgs'] = self.image_transform(self.read_image(self.get_false_img(index)))
        embeddings = self.encoded_captions[self.img_files[index]]
        embedding_ix = random.randint(0, embeddings.shape[0]-1)
        embedding = embeddings[embedding_ix, :]
        sample['true_embed'] = torch.FloatTensor(embedding)

        return sample

Cleaning data


In [2]:
import os

import theano
import theano.tensor as tensor

import pickle as pkl
import numpy
import copy
import nltk

from collections import OrderedDict, defaultdict
from scipy.linalg import norm
from nltk.tokenize import word_tokenize
from numba import jit

profile = False

#-----------------------------------------------------------------------------#
# Specify model and table locations here
#-----------------------------------------------------------------------------#
path_to_models = 'C:\Base\\'
path_to_tables = 'C:\Base\\'
#-----------------------------------------------------------------------------#

path_to_umodel = path_to_models + 'uni_skip.npz'
path_to_bmodel = path_to_models + 'bi_skip.npz'

def load_model():
    """
    Load the model with saved tables
    """
    # Load model options
    print('Loading model parameters...')
    with open('%s.pkl'%path_to_umodel, 'rb') as f:
        uoptions = pkl.load(f)
    with open('%s.pkl'%path_to_bmodel, 'rb') as f:
        boptions = pkl.load(f)

    # Load parameters
    uparams = init_params(uoptions)
    uparams = load_params(path_to_umodel, uparams)
    utparams = init_tparams(uparams)
    bparams = init_params_bi(boptions)
    bparams = load_params(path_to_bmodel, bparams)
    btparams = init_tparams(bparams)

    # Extractor functions
    print('Compiling encoders...')
    embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
    embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
    f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')

    # Tables
    print('Loading tables...')
    utable, btable = load_tables()

    # Store everything we need in a dictionary
    print('Packing up...')
    model = {}
    model['uoptions'] = uoptions
    print('uoptions')
    model['boptions'] = boptions
    print('boptions')
    model['utable'] = utable
    print('utable')
    model['btable'] = btable
    print('btable')
    model['f_w2v'] = f_w2v
    print('f_w2v')
    model['f_w2v2'] = f_w2v2
    print('f_w2v2')

    return model


def load_tables():
    """
    Load the tables
    """
    words = []
    utable = numpy.load(path_to_tables + 'utable.npy', allow_pickle = True, encoding='bytes')
    btable = numpy.load(path_to_tables + 'btable.npy', allow_pickle = True, encoding='bytes')
    f = open(path_to_tables + 'dictionary.txt', 'rb')
    for line in f:
        words.append(line.decode('utf-8').strip())
    f.close()
    utable = OrderedDict(zip(words, utable))
    btable = OrderedDict(zip(words, btable))
    return utable, btable

class Encoder(object):
    """
    Sentence encoder.
    """

    def __init__(self, model):
        self._model = model

    def encode(self, X, use_norm=True, verbose=True, batch_size=128, use_eos=False):
        return encode(self._model, X, use_norm, verbose, batch_size, use_eos)
    
def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False):
    
    X = preprocess(X)
    # word dictionary and init
    d = defaultdict(lambda : 0)
    for w in model['utable'].keys():
        d[w] = 1
    ufeatures = numpy.zeros((len(X), model['uoptions']['dim']), dtype='float32')
    bfeatures = numpy.zeros((len(X), 2 * model['boptions']['dim']), dtype='float32')

    # length dictionary
    ds = defaultdict(list)
    captions = [s.split() for s in X]
    for i,s in enumerate(captions):
        ds[len(s)].append(i)
#     print(ds)

    # Get features. This encodes by length, in order to avoid wasting computation
    for k in ds.keys():
        if verbose:
            print(k)
#         print(k, ds.keys())
        numbatches = len(ds[k])
#         print(numbatches)
        for minibatch in range(numbatches):
            caps = ds[k][minibatch::numbatches]

            if use_eos:
                uembedding = numpy.zeros((k+1, len(caps), model['uoptions']['dim_word']), dtype='float32')
                bembedding = numpy.zeros((k+1, len(caps), model['boptions']['dim_word']), dtype='float32')
            else:
                uembedding = numpy.zeros((k, len(caps), model['uoptions']['dim_word']), dtype='float32')
                bembedding = numpy.zeros((k, len(caps), model['boptions']['dim_word']), dtype='float32')
            for ind, c in enumerate(caps):
                caption = captions[c]
                for j in range(len(caption)):
                    if d[caption[j]] > 0:
                        uembedding[j,ind] = model['utable'][caption[j]]
                        bembedding[j,ind] = model['btable'][caption[j]]
                    else:
                        uembedding[j,ind] = model['utable']['UNK']
                        bembedding[j,ind] = model['btable']['UNK']
                if use_eos:
                    uembedding[-1,ind] = model['utable']['<eos>']
                    bembedding[-1,ind] = model['btable']['<eos>']
            if use_eos:
                uff = model['f_w2v'](uembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32'))
                bff = model['f_w2v2'](bembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32'))
            else:
                uff = model['f_w2v'](uembedding, numpy.ones((len(caption),len(caps)), dtype='float32'))
                bff = model['f_w2v2'](bembedding, numpy.ones((len(caption),len(caps)), dtype='float32'))
            if use_norm:
                for j in range(len(uff)):
                    uff[j] /= norm(uff[j])
                    bff[j] /= norm(bff[j])
            for ind, c in enumerate(caps):
                ufeatures[c] = uff[ind]
                bfeatures[c] = bff[ind]
    features = numpy.c_[ufeatures, bfeatures]
    return features


def preprocess(text):
    """
    Preprocess text for encoder
    """
    X = []
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    for t in text:
        sents = sent_detector.tokenize(t)
        result = ''
        for s in sents:
            tokens = word_tokenize(s)
            result += ' ' + ' '.join(tokens)
        X.append(result)
    return X


def nn(model, text, vectors, query, k=5):
    """
    Return the nearest neighbour sentences to query
    text: list of sentences
    vectors: the corresponding representations for text
    query: a string to search
    """
    qf = encode(model, [query])
    qf /= norm(qf)
    scores = numpy.dot(qf, vectors.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [text[a] for a in sorted_args[:k]]
    print('QUERY: ' + query)
    print('NEAREST: ')
    for i, s in enumerate(sentences):
        print(s, sorted_args[i])


def word_features(table):
    """
    Extract word features into a normalized matrix
    """
    features = numpy.zeros((len(table), 620), dtype='float32')
    keys = table.keys()
    for i in range(len(table)):
        f = table[keys[i]]
        features[i] = f / norm(f)
    return features


def nn_words(table, wordvecs, query, k=10):
    """
    Get the nearest neighbour words
    """
    keys = table.keys()
    qf = table[query]
    scores = numpy.dot(qf, wordvecs.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    words = [keys[a] for a in sorted_args[:k]]
    print('QUERY: ' + query)
    print('NEAREST: ')
    for i, w in enumerate(words):
        print(w)


def _p(pp, name):
    """
    make prefix-appended name
    """
    return '%s_%s'%(pp, name)


def init_tparams(params):
    """
    initialize Theano shared variables according to the initial parameters
    """
    tparams = OrderedDict()
    for kk, pp in params.items():
        tparams[kk] = theano.shared(params[kk], name=kk)
    return tparams


def load_params(path, params):
    """
    load parameters
    """
    pp = numpy.load(path)
    for kk, vv in params.items():
        if kk not in pp:
            warnings.warn('%s is not in the archive'%kk)
            continue
        params[kk] = pp[kk]
    return params


# layers: 'name': ('parameter initializer', 'feedforward')
layers = {'gru': ('param_init_gru', 'gru_layer')}

def get_layer(name):
    fns = layers[name]
    return (eval(fns[0]), eval(fns[1]))


def init_params(options):
    """
    initialize all parameters needed for the encoder
    """
    params = OrderedDict()

    # embedding
    params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word'])

    # encoder: GRU
    params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
                                              nin=options['dim_word'], dim=options['dim'])
    return params


def init_params_bi(options):
    """
    initialize all paramters needed for bidirectional encoder
    """
    params = OrderedDict()

    # embedding
    params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word'])

    # encoder: GRU
    params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
                                              nin=options['dim_word'], dim=options['dim'])
    params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r',
                                              nin=options['dim_word'], dim=options['dim'])
    return params


def build_encoder(tparams, options):
    """
    build an encoder, given pre-computed word embeddings
    """
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    x_mask = tensor.matrix('x_mask', dtype='float32')

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]

    return embedding, x_mask, ctx


def build_encoder_bi(tparams, options):
    """
    build bidirectional encoder, given pre-computed word embeddings
    """
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    embeddingr = embedding[::-1]
    x_mask = tensor.matrix('x_mask', dtype='float32')
    xr_mask = x_mask[::-1]

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, options,
                                            prefix='encoder',
                                            mask=x_mask)
    projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
                                             prefix='encoder_r',
                                             mask=xr_mask)

    ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)

    return embedding, x_mask, ctx


# some utilities
def ortho_weight(ndim):
    W = numpy.random.randn(ndim, ndim)
    u, s, v = numpy.linalg.svd(W)
    return u.astype('float32')


def norm_weight(nin,nout=None, scale=0.1, ortho=True):
    if nout == None:
        nout = nin
    if nout == nin and ortho:
        W = ortho_weight(nin)
    else:
        W = numpy.random.uniform(low=-scale, high=scale, size=(nin, nout))
    return W.astype('float32')


def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    parameter init for GRU
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params


def gru_layer(tparams, state_below, options, prefix='gru', mask=None, **kwargs):
    """
    Forward pass through GRU layer
    """
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    dim = tparams[_p(prefix,'Ux')].shape[1]

    if mask == None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
    U = tparams[_p(prefix, 'U')]
    Ux = tparams[_p(prefix, 'Ux')]

    def _step_slice(m_, x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
        u = tensor.nnet.sigmoid(_slice(preact, 1, dim))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h
        h = m_[:,None] * h + (1. - m_)[:,None] * h_

        return h

    seqs = [mask, state_below_, state_belowx]
    _step = _step_slice

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [tensor.alloc(0., n_samples, dim)],
                                non_sequences = [tparams[_p(prefix, 'U')],
                                                 tparams[_p(prefix, 'Ux')]],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps,
                                profile=profile,
                                strict=True)
    rval = [rval]
    return rval



In [5]:
# model = load_model()
# encoder = Encoder(model)

In [4]:
model = load_model()
print('load model')
encoder = Encoder(model)
print('model done')
from collections import defaultdict
import pickle

# file_caption = defaultdict(list)
# with open('C:\Base\caption.txt') as fp:
#     line = fp.readline()
#     # you may also want to remove whitespace characters like `\n` at the end of each line
#     #line = [x.strip() for x in line]
#     while(line):
#         line = line.strip()
#         print(line)
#         file_name, caption =  line.split(',')
#         file_caption[file_name].append(caption)
#         line = fp.readline()

# print(len(file_caption))
# print(file_caption['1.png'])
# print(file_caption['3.png'])
# file_Name = "file_caption_map.pickle"
# # open the file for writing
# fileObject = open(file_Name,'wb') 
# pickle.dump(file_caption,fileObject)   
# # here we close the fileObject
# fileObject.close()



Loading model parameters...
Compiling encoders...
Loading tables...
Packing up...
uoptions
boptions
utable
btable
f_w2v
f_w2v2
load model
model done


In [None]:
from numba import jit, cuda
import nltk
nltk.download('punkt')

  

file_Name = "file_caption_map.pickle"
# we open the file for reading
fileObject = open(file_Name,'rb')  
# load the object from the file into var b
file_caption = pickle.load(fileObject)  

file_emedding = defaultdict(list)
for key, value in file_caption.items():
    file_emedding[key] = encoder.encode(value, verbose=False)
#     print(key, value)
#     print(len(file_emedding[key]))
#     print(file_emedding[key])
    print('-----------------------')
    
print(len(file_emedding))
print(file_emedding['1.png'])
print(file_emedding['3.png'])

file_Name = "file_caption_embedding.pickle"
# open the file for writing
fileObject = open(file_Name,'wb') 

pickle.dump(file_emedding,fileObject)   

# here we close the fileObject
fileObject.close()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\praja\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
-----------------------
----------------

In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F


class Generator(nn.Module):
    def __init__(self, batch_size, img_size, z_dim, text_embed_dim, reduced_text_dim):
        super(Generator, self).__init__()

        self.img_size = img_size
        self.z_dim = z_dim
        self.text_embed_dim = text_embed_dim
        self.reduced_text_dim = reduced_text_dim

        self.reduced_text_dim = nn.Linear(text_embed_dim, reduced_text_dim)
        self.concat = nn.Linear(z_dim + reduced_text_dim, 64 * 8 * 4 * 4)

        # Defining the generator network architecture
        self.d_net = nn.Sequential(
            nn.ReLU(),
            nn.ConvTranspose2d(512, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 4, 2, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, 4, 2, 1),
            nn.Tanh()
        )

    def forward(self, text, z):
        """ Given a caption embedding and latent variable z(noise), generate an image
        Arguments
        ---------
        text : torch.FloatTensor
            Output of the skipthought embedding model for the caption
            text.size() = (batch_size, text_embed_dim)
        z : torch.FloatTensor
            Latent variable or noise
            z.size() = (batch_size, z_dim)
        --------
        Returns
        --------
        output : An image of shape (64, 64, 3)
        """
        reduced_text = self.reduced_text_dim(text)  # (batch_size, reduced_text_dim)
        concat = torch.cat((reduced_text, z), 1)  # (batch_size, reduced_text_dim + z_dim)
        concat = self.concat(concat)  # (batch_size, 64*8*4*4)
        concat = concat.view(-1, 64 * 8, 4, 4)  # (batch_size, 4, 4, 64*8)
        
        d_net_out = self.d_net(concat)  # (batch_size, 64, 64, 3)
        output = d_net_out / 2. + 0.5   # (batch_size, 64, 64, 3)

        return output

In [4]:
class Discriminator(nn.Module):
    def __init__(self, batch_size, img_size, text_embed_dim, text_reduced_dim):
        super(Discriminator, self).__init__()

        self.batch_size = batch_size
        self.img_size = img_size
        self.in_channels = 1
        self.text_embed_dim = text_embed_dim
        self.text_reduced_dim_val = text_reduced_dim

        # Defining the discriminator network architecture
        self.d_net = nn.Sequential(
            nn.Conv2d(self.in_channels, 64, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True))

        # output_dim = (batch_size, 4, 4, 512)
        # text.size() = (batch_size, text_embed_dim)

        # Defining a linear layer to reduce the dimensionality of caption embedding
        # from text_embed_dim to text_reduced_dim
        self.text_reduced_dim = nn.Linear(self.text_embed_dim, self.text_reduced_dim_val)

        self.cat_net = nn.Sequential(
            nn.Conv2d(512 + self.text_reduced_dim_val, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True))

        self.linear = nn.Linear(2 * 2 * 512, 1)

    def forward(self, image, text):
        """ Given the image and its caption embedding, predict whether the image
        is real or fake.
        Arguments
        ---------
        image : torch.FloatTensor
            image.size() = (batch_size, 64, 64, 3)
        text : torch.FloatTensor
            Output of the skipthought embedding model for the caption
            text.size() = (batch_size, text_embed_dim)
        --------
        Returns
        --------
        output : Probability for the image being real/fake
        logit : Final score of the discriminator
        """

#         print('text', text.shape)
#         print('image ', image.shape)
        d_net_out = self.d_net(image)  # (batch_size, 4, 4, 512)
#         print('d_net_out ', d_net_out.shape)
        text_reduced = self.text_reduced_dim(text)  # (batch_size, text_reduced_dim)
#         print('text_reduced original ', text_reduced.shape)
        
        text_reduced = text_reduced.unsqueeze(1)  # (batch_size, 1, text_reduced_dim)
#         print('text_reduced1 ', text_reduced.shape)
        text_reduced = text_reduced.unsqueeze(2)  # (batch_size, 1, 1, text_reduced_dim)
#         print('text_reduced2 ', text_reduced.shape)
        text_reduced = text_reduced.transpose(3, 1)
#         print('text_reduced3 ', text_reduced.shape)
        
        text_reduced = text_reduced.expand(-1, -1,d_net_out.shape[2],d_net_out.shape[3])
#         print('text_reduced4 ', text_reduced.shape)
        
        concat_out = torch.cat((d_net_out, text_reduced), 1)  # (1, 4, 4, 512+text_reduced_dim)
        
        logit = self.cat_net(concat_out)
#         print('old logit is ', logit.shape)
        logit = logit.view(-1, 512*2*2)
#         print('new logit is ', logit.shape) 
        output = F.sigmoid(self.linear(logit))
#         print('output shape', output.shape)
        output = output.view(-1, 1).squeeze(1)
        #print('output shape', output.shape)
        return output, logit

        d_net_out = self.d_net(image)  # (batch_size, 4, 4, 512)
        text_reduced = self.text_reduced_dim(text)  # (batch_size, text_reduced_dim)
        
        text_reduced = text_reduced.unsqueeze(1)  # (batch_size, 1, text_reduced_dim)
        text_reduced = text_reduced.unsqueeze(2)  # (batch_size, 1, 1, text_reduced_dim)
        text_reduced = text_reduced.transpose(3, 1)
        
        text_reduced = text_reduced.expand(-1, -1,d_net_out.shape[2],d_net_out.shape[3])
        
        concat_out = torch.cat((d_net_out, text_reduced), 1)  # (1, 4, 4, 512+text_reduced_dim)
        
        logit = self.cat_net(concat_out)
        logit = logit.view(-1, 512*2*2)
        output = F.sigmoid(self.linear(logit))
        output = output.view(-1, 1).squeeze(1)
        #print('output shape', output.shape)
        return output, logit
        

In [5]:
import numpy as np
from torch import nn
from torch import  autograd
import torch
import os

class Concat_embed(nn.Module):

    def __init__(self, embed_dim, projected_embed_dim):
        super(Concat_embed, self).__init__()
        self.projection = nn.Sequential(
            nn.Linear(in_features=embed_dim, out_features=projected_embed_dim),
            nn.BatchNorm1d(num_features=projected_embed_dim),
            nn.LeakyReLU(negative_slope=0.2, inplace=True)
            )

    def forward(self, inp, embed):
        projected_embed = self.projection(embed)
        replicated_embed = projected_embed.repeat(4, 4, 1, 1).permute(2,  3, 0, 1)
        hidden_concat = torch.cat([inp, replicated_embed], 1)

        return hidden_concat


class minibatch_discriminator(nn.Module):
    def __init__(self, num_channels, B_dim, C_dim):
        super(minibatch_discriminator, self).__init__()
        self.B_dim = B_dim
        self.C_dim =C_dim
        self.num_channels = num_channels
        T_init = torch.randn(num_channels * 4 * 4, B_dim * C_dim) * 0.1
        self.T_tensor = nn.Parameter(T_init, requires_grad=True)

    def forward(self, inp):
        inp = inp.view(-1, self.num_channels * 4 * 4)
        M = inp.mm(self.T_tensor)
        M = M.view(-1, self.B_dim, self.C_dim)

        op1 = M.unsqueeze(3)
        op2 = M.permute(1, 2, 0).unsqueeze(0)

        output = torch.sum(torch.abs(op1 - op2), 2)
        output = torch.sum(torch.exp(-output), 2)
        output = output.view(M.size(0), -1)

        output = torch.cat((inp, output), 1)

        return output


class Utils(object):

    @staticmethod
    def smooth_label(tensor, offset):
        return tensor + offset

    @staticmethod

    # based on:  https://github.com/caogang/wgan-gp/blob/master/gan_cifar10.py
    def compute_GP(netD, real_data, real_embed, fake_data, LAMBDA):
        BATCH_SIZE = real_data.size(0)
        alpha = torch.rand(BATCH_SIZE, 1)
        alpha = alpha.expand(BATCH_SIZE, int(real_data.nelement() / BATCH_SIZE)).contiguous().view(BATCH_SIZE, 3, 64, 64)
        alpha = alpha.cuda()

        interpolates = alpha * real_data + ((1 - alpha) * fake_data)

        interpolates = interpolates.cuda()

        interpolates = autograd.Variable(interpolates, requires_grad=True)

        disc_interpolates, _ = netD(interpolates, real_embed)

        gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                  grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
                                  create_graph=True, retain_graph=True, only_inputs=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA

        return gradient_penalty

    @staticmethod
    def save_checkpoint(netD, netG, dir_path, subdir_path, epoch):
        path =  os.path.join(dir_path, subdir_path)
        if not os.path.exists(path):
            os.makedirs(path)

        torch.save(netD.state_dict(), '{0}/disc_{1}.pth'.format(path, epoch))
        torch.save(netG.state_dict(), '{0}/gen_{1}.pth'.format(path, epoch))

    @staticmethod
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

In [6]:
import os
import argparse
import time
import datetime
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision.utils import save_image

import numpy as np
import itertools
import torchvision.utils as vutils

class GAN_CLS(object):
    def __init__(self, args, data_loader, SUPERVISED=True):
        """
        Arguments :
        ----------
        args : Arguments defined in Argument Parser
        data_loader = An instance of class DataLoader for loading our dataset in batches
        SUPERVISED :
        """
        config = args
        self.data_loader = data_loader
        self.num_epochs = args.num_epochs
        self.batch_size = args.batch_size

        self.log_step = config.log_step
        self.sample_step = config.sample_step

        self.log_dir = args.log_dir
        self.checkpoint_dir = args.checkpoint_dir
        self.sample_dir = config.sample_dir
        self.final_model = args.final_model

        self.dataset = args.dataset
        #self.model_name = args.model_name

        self.img_size = args.img_size
        self.z_dim = args.z_dim
        self.text_embed_dim = args.text_embed_dim
        self.text_reduced_dim = args.text_reduced_dim
        self.learning_rate = args.learning_rate
        self.beta1 = args.beta1
        self.beta2 = args.beta2
        self.l1_coeff = args.l1_coeff
        self.resume_epoch = args.resume_epoch
        self.SUPERVISED = SUPERVISED

        # Logger setting
        self.logger = logging.getLogger('__name__')
        self.logger.setLevel(logging.INFO)
        self.formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s')
        self.file_handler = logging.FileHandler(self.log_dir+'/file.log')
        self.file_handler.setFormatter(self.formatter)
        self.logger.addHandler(self.file_handler)

        self.build_model()

    def build_model(self):
        """ A function of defining following instances :
        -----  Generator
        -----  Discriminator
        -----  Optimizer for Generator
        -----  Optimizer for Discriminator
        -----  Defining Loss functions
        """

        # ---------------------------------------------------------------------
        #						1. Network Initialization
        # ---------------------------------------------------------------------
        self.gen = Generator(batch_size=self.batch_size,
            img_size=self.img_size,
            z_dim=self.z_dim,
            text_embed_dim=self.text_embed_dim,
            reduced_text_dim=self.text_reduced_dim)

        self.disc = Discriminator(batch_size=self.batch_size,
                                  img_size=self.img_size,
                                  text_embed_dim=self.text_embed_dim,
                                  text_reduced_dim=self.text_reduced_dim)

        self.gen_optim = optim.Adam(self.gen.parameters(),
                                    lr=self.learning_rate,
                                    betas=(self.beta1, self.beta2))

        self.disc_optim = optim.Adam(self.disc.parameters(),
                                     lr=self.learning_rate,
                                     betas=(self.beta1, self.beta2))

        self.cls_gan_optim = optim.Adam(itertools.chain(self.gen.parameters(),
                                                        self.disc.parameters()),
                                        lr=self.learning_rate,
                                        betas=(self.beta1, self.beta2))

        print ('-------------  Generator Model Info  ---------------')
        self.print_network(self.gen, 'G')
        print ('------------------------------------------------')

        print ('-------------  Discriminator Model Info  ---------------')
        self.print_network(self.disc, 'D')
        print ('------------------------------------------------')

        self.gen.cuda()
        self.disc.cuda()
        self.criterion = nn.BCELoss().cuda()
        self.l1loss = nn.L1Loss().cuda()
        self.l2loss = nn.MSELoss().cuda()
        # self.CE_loss = nn.CrossEntropyLoss().cuda()
        # self.MSE_loss = nn.MSELoss().cuda()
        self.gen.train()
        self.disc.train()

    def print_network(self, model, name):
        """ A function for printing total number of model parameters """
        num_params = 0
        for p in model.parameters():
            num_params += p.numel()

        print(model)
        print(name)
        print("Total number of parameters: {}".format(num_params))

    def load_checkpoints(self, resume_epoch):
        """Restore the trained generator and discriminator."""
        print('Loading the trained models from step {}...'.format(resume_epoch))
        G_path = os.path.join(self.checkpoint_dir, '{}-G.ckpt'.format(resume_epoch))
        D_path = os.path.join(self.checkpoint_dir, '{}-D.ckpt'.format(resume_epoch))
        self.gen.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage))
        self.disc.load_state_dict(torch.load(D_path, map_location=lambda storage, loc: storage))

    def save_img_results(self, data_img, fake, epoch, image_dir):
        num = 64
        fake = fake[0:num]
        # data_img is changed to [0,1]
        if data_img is not None:
            data_img = data_img[0:num]
            vutils.save_image(data_img, '%s/real_samples_epoch_%03d.png' % (image_dir, epoch), normalize=True)
            # fake.data is still [-1, 1]
            vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' %(image_dir, epoch), normalize=True)
        else:
            vutils.save_image(
                fake.data, '%s/lr_fake_samples_epoch_%03d.png' %
                (image_dir, epoch), normalize=True)

    def train_model(self):
        fixed_noise = Variable(torch.randn(64, self.z_dim)).cuda()
        data_loader = self.data_loader

        start_epoch = 0
        if self.resume_epoch:
            start_epoch = self.resume_epoch
            self.load_checkpoints(self.resume_epoch)

        print ('---------------  Model Training Started  ---------------')
        start_time = time.time()
        log = ""
        for epoch in range(start_epoch, self.num_epochs):
            start_t = time.time()
            for idx, batch in enumerate(data_loader):
                true_imgs = batch['true_imgs']
                true_embed = batch['true_embed']
                false_imgs = batch['false_imgs']

                real_labels = torch.ones(true_imgs.size(0))
                fake_labels = torch.zeros(true_imgs.size(0))
                
                smooth_real_labels = torch.FloatTensor(Utils.smooth_label(real_labels.numpy(), -0.1))

                true_imgs = Variable(true_imgs.float()).cuda()
                true_embed = Variable(true_embed.float()).cuda()
                false_imgs = Variable(false_imgs.float()).cuda()

                real_labels = Variable(real_labels).cuda()
                smooth_real_labels = Variable(smooth_real_labels).cuda()
                fake_labels = Variable(fake_labels).cuda()

                # ---------------------------------------------------------------
                #                   2. Training the discriminator
                # ---------------------------------------------------------------
                self.disc.zero_grad()
                true_out, true_logit = self.disc(true_imgs, true_embed)
                false_out, false_logit = self.disc(false_imgs, true_embed)
                disc_loss = self.criterion(true_out, smooth_real_labels) + self.criterion(false_out, fake_labels)

                noise = Variable(torch.randn(true_imgs.size(0), self.z_dim)).cuda()
                fake_imgs = self.gen(true_embed, noise)
                false_out, _ = self.disc(fake_imgs, true_embed)
                disc_loss = disc_loss + self.criterion(false_out, fake_labels)

                disc_loss.backward()
                self.disc_optim.step()


                # ---------------------------------------------------------------
                # 					  3. Training the generator
                # ---------------------------------------------------------------
                self.gen.zero_grad()
                
                z = Variable(torch.randn(true_imgs.size(0), self.z_dim)).cuda()
                fake_imgs = self.gen(true_embed, z)
                fake_out, fake_logit = self.disc(fake_imgs, true_embed)
                true_out, true_logit = self.disc(true_imgs, true_embed)

                activation_fake = torch.mean(fake_logit, 0)
                activation_real = torch.mean(true_logit, 0)

                gen_loss = self.criterion(fake_out, real_labels)
                gen_loss = gen_loss + self.l1_coeff * self.l1loss(fake_imgs, true_imgs) + self.l2loss(activation_fake, activation_real)

                gen_loss.backward()
                self.gen_optim.step()

                # self.cls_gan_optim.step()

                # Logging
                loss = {}
                loss['G_loss'] = gen_loss.item()
                loss['D_loss'] = disc_loss.item()

                # ---------------------------------------------------------------
                # 					4. Logging INFO into log_dir
                # ---------------------------------------------------------------
                if idx % self.log_step == 0:
                    end_time = time.time() - start_time
                    end_time = datetime.timedelta(seconds=end_time)
                    log = "Elapsed [{}], Epoch [{}/{}], Idx [{}/{}]".format(end_time, epoch,
                                                                         self.num_epochs, idx, len(data_loader))
                    for net, loss_value in loss.items():
                        log += ", {}: {:.4f}".format(net, loss_value)
                    print (log)
                    self.logger.info(log)

                """
                log = "Epoch [{}/{}], Idx [{}/{}]".format(epoch, self.num_epochs, idx, len(data_loader))
                for net, loss_value in loss.items():
                    log += ", {}: {:.4f}".format(net, loss_value)
                
                self.logger.info(log)
                """    

                # ---------------------------------------------------------------
                # 					5. Saving generated images
                # ---------------------------------------------------------------
                if (idx + 1) % self.sample_step == 0:
                    fake_imgs = self.gen(true_embed, fixed_noise)
                    concat_imgs = torch.cat((true_imgs, fake_imgs), 2)  # ??????????
                    save_path = os.path.join(self.sample_dir, '{}-images.png'.format(idx + 1))
                    concat_imgs = (concat_imgs + 1) / 2
                    # out.clamp_(0, 1)
                    #save_image(concat_imgs.data.cpu(), save_path, nrow=1, padding=0)
                    self.save_img_results(true_imgs, fake_imgs, epoch, self.sample_dir)

                    print ('Saved real and fake images into {}...'.format(self.sample_dir))

                # ---------------------------------------------------------------
                # 				6. Saving the checkpoints & final model
                # ---------------------------------------------------------------
            
            end_t = time.time()    
            G_path = os.path.join(self.checkpoint_dir, '{}-G.ckpt'.format(epoch))
            D_path = os.path.join(self.checkpoint_dir, '{}-D.ckpt'.format(epoch))
            torch.save(self.gen.state_dict(), G_path)
            torch.save(self.disc.state_dict(), D_path)
            print(log)
            print('Total Time: {:.2f} sec and Saved model checkpoints into {}...'.format((end_t - start_t), self.checkpoint_dir))        

        print ('---------------  Model Training Completed  ---------------')
        # Saving final model into final_model directory
        G_path = os.path.join(self.final_model, '{}-G.pth'.format('final'))
        D_path = os.path.join(self.final_model, '{}-D.pth'.format('final'))
        torch.save(self.gen.state_dict(), G_path)
        torch.save(self.disc.state_dict(), D_path)
        print('Saved final model into {}...'.format(self.final_model))


In [7]:
%tb
import os
import time
import argparse

import torch
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision.utils import save_image



def test():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f')
    parser.add_argument('--batch_size', type=int, default=1,
                        help='Batch Size')
    parser.add_argument('--img_size', type=int, default=64,
                        help='Size of the image')
    parser.add_argument('--z_dim', type=int, default=100,
                        help='Size of the latent variable')
    parser.add_argument('--final_model', type=str, default='final_model',
                        help='Save INFO into logger after every x iterations')
    parser.add_argument('--save_img', type=str, default='test',
                        help='Save predicted images')
    parser.add_argument('--text_embed_dim', type=int, default=4800,
                        help='Size of the embeddding for the captions')
    parser.add_argument('--text_reduced_dim', type=int, default=1024,
                        help='Reduced dimension of the caption encoding')
    parser.add_argument('--text', type=str, help='Input text to be converted into image')
    
    config = parser.parse_args()
#     if not os.path.exists(config.save_img):
#         os.makedirs('Data' + config.save_img)

    start_time = time.time()
    gen = Generator(batch_size=config.batch_size,
                    img_size=config.img_size,
                    z_dim=config.z_dim,
                    text_embed_dim=config.text_embed_dim,
                    reduced_text_dim=config.text_reduced_dim)

    # Loading the trained model
    G_path = os.path.join(config.final_model, '{}-G.pth'.format('final'))
    gen.load_state_dict(torch.load(G_path))
    # torch.load(gen.state_dict(), G_path)
    gen.eval()

    z = Variable(torch.randn(config.batch_size, config.z_dim)).cuda()
    model = load_model()
    text_embed = skipthoughts.encode(model, config.text)
    output_img = gen(text_embed, z)
    save_image(output_img.cpu(), config.save_img, nrow=1, padding=0)

    print ('Generated image save to {}'.format(config.save_img))
    print ('Time taken for the task : {}'.format(time.time() - start_time))


ModuleNotFoundError: No module named 'pickles'

In [8]:
from visdom import Visdom
import numpy as np
import torchvision
from PIL import ImageDraw, Image, ImageFont
import torch
import pdb

class VisdomPlotter(object):

    """Plots to Visdom"""

    def __init__(self, env_name='gan'):
        self.viz = Visdom()
        self.env = env_name
        self.plots = {}

    def plot(self, var_name, split_name, x, y, xlabel='epoch'):
        if var_name not in self.plots:
            self.plots[var_name] = self.viz.line(X=np.array([x,x]), Y=np.array([y,y]), env=self.env, opts=dict(
                legend=[split_name],
                title=var_name,
                xlabel=xlabel,
                ylabel=var_name
            ))
        else:
            self.viz.updateTrace(X=np.array([x]), Y=np.array([y]), env=self.env, win=self.plots[var_name], name=split_name)

    def draw(self, var_name, images):
        if var_name not in self.plots:
            self.plots[var_name] = self.viz.images(images, env=self.env)
        else:
            self.viz.images(images, env=self.env, win=self.plots[var_name])

In [8]:
%tb

import os
import torch
import argparse
import numpy as np
from torch.utils.data import DataLoader



def check_dir(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

    print ('{} created'.format(dir_name))


def check_args(args):
    # Make all directories if they don't exist

    # --checkpoint_dir
    check_dir(args.checkpoint_dir)

    # --sample_dir
    check_dir(args.sample_dir)

    # --log_dir
    check_dir(args.log_dir)

    # --final_model dir
    check_dir(args.final_model)

    # --epoch
    assert args.num_epochs > 0, 'Number of epochs must be greater than 0'

    # --batch_size
    assert args.batch_size > 0, 'Batch size must be greater than zero'

    # --z_dim
    assert args.z_dim > 0, 'Size of the noise vector must be greater than zero'

    return args


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f')

    parser.add_argument_group('Dataset related arguments')
    parser.add_argument('--data_dir', type=str, default="data",
                        help='Data Directory')

    parser.add_argument('--dataset', type=str, default="flowers",
                        help='Dataset to train')

    parser.add_argument_group('Model saving path and steps related arguments')
    parser.add_argument('--log_step', type=int, default=1,
                        help='Save INFO into logger after every x iterations')

    parser.add_argument('--sample_step', type=int, default=100,
                        help='Save generated image after every x iterations')

    parser.add_argument('--checkpoint_dir', type=str, default='checkpoints',
                        help='Save model checkpoints after every x iterations')

    parser.add_argument('--sample_dir', type=str, default='sample',
                        help='Save generated image after every x iterations')

    parser.add_argument('--log_dir', type=str, default='logs',
                        help='Save INFO into logger after every x iterations')

    parser.add_argument('--final_model', type=str, default='final_model',
                        help='Save INFO into logger after every x iterations')

    parser.add_argument_group('Model training related arguments')
    parser.add_argument('--num_epochs', type=int, default=200,
                        help='Total number of epochs to train')

    parser.add_argument('--batch_size', type=int, default=64,
                        help='Batch Size')

    parser.add_argument('--img_size', type=int, default=64,
                        help='Size of the image')

    parser.add_argument('--z_dim', type=int, default=100,
                        help='Size of the latent variable')

    parser.add_argument('--text_embed_dim', type=int, default=4800,
                        help='Size of the embeddding for the captions')

    parser.add_argument('--text_reduced_dim', type=int, default=1024,
                        help='Reduced dimension of the caption encoding')

    parser.add_argument('--learning_rate', type=float, default=0.0002,
                        help='Learning Rate')

    parser.add_argument('--beta1', type=float, default=0.5,
                        help='Hyperparameter of the Adam optimizer')

    parser.add_argument('--beta2', type=float, default=0.999,
                        help='Hyperparameter of the Adam optimizer')

    parser.add_argument('--l1_coeff', type=float, default=50,
                        help='Coefficient for the L1 Loss')

    parser.add_argument('--l2_coeff', type=float, default=100,
                        help='Coefficient for the L1 Loss')

    parser.add_argument('--resume_epoch', type=int, default=0,
                        help='Resume epoch to resume training')

    args = parser.parse_args()

    check_args(args)

    dataset = Text2ImageDataset(args.data_dir)
    data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)

    gan = GAN_CLS(args, data_loader)
    gan.build_model()
    gan.train_model()


if __name__ == '__main__':
    main()

No traceback available to show.


checkpoints created
sample created
logs created
final_model created
------------------  Loading images  ------------------
Load filenames from: C:\Base\file_caption_map.pickle (18818)
------------------  Loading captions  ----------------
---------------  Loading Skip-thought Model  ---------------


KeyboardInterrupt: 

In [None]:
from __future__ import print_function
import sys
import os
import time
import argparse

import torch
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision.utils import save_image
import torchvision.utils as vutils



import tkinter

from tkinter import *
from PIL import Image, ImageTk
import numpy
import random


dir_path = (os.path.abspath(os.path.join(os.path.realpath('C:\Base'))))
print(dir_path)
sys.path.append(dir_path)


def parse_args():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f')
    parser.add_argument('--batch_size', type=int, default=1,
                        help='Batch Size')
    parser.add_argument('--img_size', type=int, default=64,
                        help='Size of the image')
    parser.add_argument('--z_dim', type=int, default=100,
                        help='Size of the latent variable')
    parser.add_argument('--final_model', type=str, default='final_model',
                        help='Save INFO into logger after every x iterations')
    parser.add_argument('--save_img', type=str, default='.',
                        help='Save predicted images')
    parser.add_argument('--text_embed_dim', type=int, default=4800,
                        help='Size of the embeddding for the captions')
    parser.add_argument('--text_reduced_dim', type=int, default=1024,
                        help='Reduced dimension of the caption encoding')
    parser.add_argument('--text', type=str, help='Input text to be converted into image')

    args = parser.parse_args()
    return args


config = parse_args()


print('------------------------SKIP THOUGHT LOADING-----------------------')
model = load_model()
encoder = Encoder(model)
print('------------------------SKIP THOUGHT LOADING FINISHED-----------------------')

print('------------------------GENERATOR LOADING-----------------------')

gen = Generator(batch_size=config.batch_size,
                    img_size=config.img_size,
                    z_dim=config.z_dim,
                    text_embed_dim=config.text_embed_dim,
                    reduced_text_dim=config.text_reduced_dim)

gen.cuda()
# Loading the trained model
G_path = os.path.join('C:/Base/final/', 'final_model-G.ckpt')


gen.load_state_dict(torch.load(G_path))
gen.eval()
print('------------------------GENERATOR LOADING FINISHED-----------------------')

output_dir = './'    


top = Tk()
top.title('Reverse Image Captioning')
top.geometry('500x500')

top_row = Frame(top).grid(row=0)

left = Frame(top_row).grid(row=0, column=0)
L1 = Label(left, text="Enter the image description:").grid(row=0)
E1 = Entry(left)
E1.grid(row=1)

right = Frame(top_row).grid(row=0, column=1)

canvas = Canvas(right, width=300,height=300, bd=0,bg='white')
canvas.grid(row=0, column=1)

def GenerateImage():
    z = torch.randn(config.batch_size, config.z_dim)
    z = z.cuda()
    text_input = E1.get()
    if (len(text_input) >= 10):
        text_input = text_input.lower()
        text_input = [text_input]
        print(text_input)
        text_embedding = encoder.encode(text_input)
        print(text_embedding)
        text_embedding = torch.from_numpy(text_embedding)
        text_embedding = text_embedding.cuda()
        print(text_embedding.shape)
        output_img = gen(text_embedding, z)
        save_name = 'output.png'
        
        # fake.data is still [-1, 1]
        vutils.save_image(output_img.data, save_name, normalize=True)
            
        load = Image.open(save_name)
        w, h = load.size
        load = load.resize((256, 256))
        imgfile = ImageTk.PhotoImage(load)
        
        canvas.image = imgfile  # <--- keep refe~rence of your image
        canvas.create_image(2,2,anchor='nw',image=imgfile)
    
    E1.delete(0, END)


submit_button = Button(top, text ='Generate Image', command = GenerateImage)
submit_button.grid(row=2, column=0)

submit_button = Button(top, text ='Exit', command = top.quit)
submit_button.grid(row=2, column=1)

top.mainloop()

C:\Base
------------------------SKIP THOUGHT LOADING-----------------------
Loading model parameters...
Compiling encoders...
Loading tables...
Packing up...
uoptions
boptions
utable
btable
f_w2v
f_w2v2
------------------------SKIP THOUGHT LOADING FINISHED-----------------------
------------------------GENERATOR LOADING-----------------------
------------------------GENERATOR LOADING FINISHED-----------------------
['alpha to the right of beta']
6
[[-0.0121488   0.00848362  0.00015675 ... -0.03058045 -0.01231285
  -0.01715059]]
torch.Size([1, 4800])
['percentage to the right of thunder']
6
[[-0.00747196  0.00042758 -0.00934522 ... -0.01970895 -0.00348631
   0.01339394]]
torch.Size([1, 4800])
