In [28]:
from dataloader.data_loader import dataloader

#importing pytorch
import torch
import torch.nn.functional as F

#importing the models
from model.base_function import init_net
#from model.base_model import BaseModel
from model.cnn_encoder import CNN_ENCODER
from model.rnn_encoder import RNN_ENCODER
from model.attention_textual_resnet_encoder import AttTextualResEncoder
from model.hidden_resnet_generator import HiddenResGenerator
from model.resnet_discriminator import ResDiscriminator

#importing untilities
from model import base_function, external_function
from util import task, util
import itertools
from options.global_config import TextConfig
import pickle
import copy
import time

###
from options.train_options import TrainOptions
from model import create_model

In [2]:
class Options:
    def __init__(self, 
                 name='fake/real_classifier', 
                 model='tdanet', 
                 mask_type=[1, 2, 3], 
                 checkpoints_dir='./checkpoints', 
                 which_iter='latest',
                 gpu_ids=[],
                 text_config = 'config.bird.yml',
                 output_scale=4,
                 img_file='/data/dataset/train', 
                 mask_file='none',
                 loadSize=[266,266],
                 fineSize=[256, 256],
                 resize_or_crop='resize_and_crop',
                 no_flip=False,
                 no_rotation=False,
                 no_augment=False,                 
                 batchSize=10, 
                 nThreads=8,
                 no_shuffle=False,
                 display_winsize=256,
                 display_id=1,
                 display_port=8097,
                 display_single_pane_ncols = 0,
                 prior_alpha=0.8, 
                 prior_beta=8,
                 no_maxpooling=False,
                 update_language=False,
                 detach_embedding=False,
                 train_paths='two', 
                 dynamic_sigma=False, 
                 lambda_rec_l1=20.0, 
                 lambda_gen_l1=20.0, 
                 lambda_kl=20.0, 
                 lambda_gan=1.0,
                 lambda_match=0.1,
                 iter_count=1, 
                 niter=100,
                 niter_decay=0,
                 continue_train=False,
                 valid_file='/data/dataset/valid',
                 lr_policy='lambda', 
                 lr=1e-4, 
                 gan_mode='lsgan',
                 display_freq=100,
                 print_freq=100,
                 save_latest_freq=1000,
                 save_iters_freq=10000,
                 no_html=False,
                 
                 results_dir='./results/',
                 phase='test',
                 nsampling=50,
                 ncaptions=10,
                 save_number=10,
                 no_variance=False,
                ):
        
        self.name=name
        self.model=model
        self.mask_type=mask_type
        self.img_file=img_file # has paths of all the images
        self.mask_file=mask_file
        self.checkpoints_dir = checkpoints_dir
        self.which_iter = which_iter
        self.gpu_ids = gpu_ids
        self.text_config = text_config
        self.output_scale=output_scale
        self.batchSize=batchSize
        self.loadSize = loadSize
        self.resize_or_crop=resize_or_crop
        self.no_flip=no_flip
        self.no_rotation = no_rotation
        self.no_augment = no_augment
        self.nThreads = nThreads,
        self.no_shuffle=no_shuffle
        self.display_winsize = display_winsize
        self.display_id = display_id
        self.display_port = display_port
        self.fineSize=fineSize
        self.display_single_pane_ncols = display_single_pane_ncols
        self.no_maxpooling = no_maxpooling
        self.update_language = update_language
        self.detach_embedding = detach_embedding
        self.prior_alpha=prior_alpha
        self.prior_beta=prior_beta
        self.gan_mode=gan_mode
        self.no_variance=no_variance
        self.nsampling=nsampling
        self.train_paths=train_paths
        self.dynamic_sigma=dynamic_sigma
        self.lambda_rec_l1=lambda_rec_l1
        self.lambda_gen_l1=lambda_gen_l1
        self.lambda_kl=lambda_kl
        self.lambda_gan=lambda_gan
        self.lambda_match=lambda_match
        self.iter_count = iter_count
        self.niter = niter
        self.niter_decay = niter_decay
        self.continue_train = continue_train
        self.valid_file = valid_file
        self.lr_policy = lr_policy
        self.lr = lr
        self.display_freq = display_freq
        self.print_freq = print_freq
        self.save_latest_freq = save_latest_freq
        self.save_iters_freq = save_iters_freq
        self.no_html = no_html
        
        self.results_dir=results_dir
        self.phase=phase
        self.nsampling=nsampling
        self.ncaptions=ncaptions
        self.save_number=save_number
        self.no_variance=no_variance
        
        self.isTrain = True #MAKE IT FALSE WHEN TESTING!!!
        

opt=Options(name='tda_bird',model="tdanet",mask_type=[0,1,2,3],img_file='./datasets/CUB_200_2011/train.flist',mask_file='./datasets/CUB_200_2011/train_mask.flist',text_config='config.bird.yml') #TRAINING OPTIONS, CHANGE FOR TESTING!!!


In [3]:
DEFAULT_CONFIG = {
    'MAX_TEXT_LENGTH' : 128,

    'VOCAB' : "./datasets/captions_vocab_bird.pickle",      # The path to DAMSM vocab pickle file
    'LANGUAGE_ENCODER' : "./datasets/text_encoder_bird.pth",    # The path to DAMSM text encoder
    'IMAGE_ENCODER': "./datasets/image_encoder_bird.pth",   # The path to DAMSM image encoder
    'EMBEDDING_DIM' : 256,

    'CATE_IMAGE_TRAIN' : "./datasets/CUB_200_2011/cate_image_train.json",   # The path to category-image mapping cache file
    'IMAGE_CATE_TRAIN' : "./datasets/CUB_200_2011/image_cate.json",     # The path to image-category mapping cache file

    'CAPTION' : "./datasets/CUB_200_2011/caption.json",     # The path to image-caption cache file

}

In [4]:
# for generating masks
def scale_pyramid(img, num_scales):
    scaled_imgs = [img]

    s = img.size()

    h = s[2]
    w = s[3]

    for i in range(1, num_scales):
        ratio = 2**i
        nh = h // ratio
        nw = w // ratio
        scaled_img = scale_img(img, size=[nh, nw])
        scaled_imgs.append(scaled_img)

    scaled_imgs.reverse()
    return scaled_imgs

# scaling images
def scale_img(img, size):
    scaled_img = F.interpolate(img, size=size, mode='bilinear', align_corners=True)
    return scaled_img

In [5]:
class TDAnet:
    def __init__(self, opt):
        
        self.loss_names = ['kl_rec', 'kl_g', 'l1_rec', 'l1_g', 'gan_g', 'word_g', 'sentence_g', 'ad_l2_g',
                           'gan_rec', 'ad_l2_rec', 'word_rec', 'sentence_rec',  'dis_img', 'dis_img_rec']
        self.log_names = []
        self.visual_names = ['img_m', 'img_truth', 'img_c', 'img_out', 'img_g', 'img_rec']
        self.text_names = ['text_positive']
        self.value_names = ['u_m', 'sigma_m', 'u_post', 'sigma_post', 'u_prior', 'sigma_prior']
        self.model_names = ['E', 'G', 'D', 'D_rec']
        self.distribution = []
        self.prior_alpha = opt.prior_alpha
        self.prior_beta = opt.prior_beta
        self.max_pool = None if opt.no_maxpooling else 'max'

        # inpainting model
        self.net_E = network.define_att_textual_e(ngf=32, z_nc=256, img_f=256, layers=5, norm='none', activation='LeakyReLU',
                          init_type='orthogonal', gpu_ids=opt.gpu_ids, image_dim=256, text_dim=256, multi_peak=False, pool_attention=self.max_pool)
        self.net_G = network.define_hidden_textual_g(f_text_dim=768, ngf=32, z_nc=256, img_f=256, L=0, layers=5, output_scale=opt.output_scale,
                                      norm='instance', activation='LeakyReLU', init_type='orthogonal', gpu_ids=opt.gpu_ids)
        
        #discriminator model
        self.net_D = network.define_d(ndf=32, img_f=128, layers=5, model_type='ResDis', init_type='orthogonal', gpu_ids=opt.gpu_ids)
        self.net_D_rec = network.define_d(ndf=32, img_f=128, layers=5, model_type='ResDis', init_type='orthogonal', gpu_ids=opt.gpu_ids)

        self._init_language_model(DEFAULT_CONFIG)

        if self.isTrain:
            # define the loss functions
            self.GANloss = external_function.GANLoss(opt.gan_mode)
            self.L1loss = torch.nn.L1Loss()
            self.L2loss = torch.nn.MSELoss()

            self.image_encoder = network.CNN_ENCODER(DEFAULT_CONFIG['EMBEDDING_DIM'])
            state_dict = torch.load(
                DEFAULT_CONFIG['IMAGE_ENCODER'], map_location=lambda storage, loc: storage)
            self.image_encoder.load_state_dict(state_dict)
            self.image_encoder.eval()
            if len(self.gpu_ids) > 0 and torch.cuda.is_available():
                self.image_encoder.cuda()
            base_function._freeze(self.image_encoder)

            # define the optimizer
            self.optimizer_G = torch.optim.Adam(itertools.chain(filter(lambda p: p.requires_grad, self.net_G.parameters()),
                        filter(lambda p: p.requires_grad, self.net_E.parameters())), lr=opt.lr, betas=(0.0, 0.999))
            self.optimizer_D = torch.optim.Adam(itertools.chain(filter(lambda p: p.requires_grad, self.net_D.parameters()),
                                                filter(lambda p: p.requires_grad, self.net_D_rec.parameters())),
                                                lr=opt.lr, betas=(0.0, 0.999))
            self.optimizers.append(self.optimizer_G)
            self.optimizers.append(self.optimizer_D)

        self.setup(opt)

        def _init_language_model(self, text_config):
            x = pickle.load(open(text_config['VOCAB'], 'rb'))
            self.ixtoword = x[2]
            self.wordtoix = x[3]

            word_len = len(self.wordtoix)
            self.text_encoder = network.RNN_ENCODER(word_len, nhidden=256)

            state_dict = torch.load(text_config['LANGUAGE_ENCODER'], map_location=lambda storage, loc: storage)
            self.text_encoder.load_state_dict(state_dict)
            self.text_encoder.eval()
            if not self.opt.update_language:
                self.text_encoder.requires_grad_(False)
            if len(self.gpu_ids) > 0 and torch.cuda.is_available():
                self.text_encoder.cuda()

        def set_input(self, input, epoch=0):
            """Unpack input data from the data loader and perform necessary pre-process steps"""
            self.input = input
            self.image_paths = self.input['img_path']
            self.img = input['img']
            self.mask = input['mask']
            self.caption_idx = input['caption_idx']
            self.caption_length = input['caption_len']

            if len(self.gpu_ids) > 0:
                self.img = self.img.cuda(self.gpu_ids[0], True)
                self.mask = self.mask.cuda(self.gpu_ids[0], True)

            # get I_m and I_c for image with mask and complement regions for training
            self.img_truth = self.img * 2 - 1
            self.img_m = self.mask * self.img_truth
            self.img_c =  (1 - self.mask) * self.img_truth

            # get multiple scales image ground truth and mask for training
            self.scale_img = scale_pyramid(self.img_truth, self.opt.output_scale)
            self.scale_mask = scale_pyramid(self.mask, self.opt.output_scale)

            # About text stuff
            self.text_positive = util.idx_to_caption(
                                        self.ixtoword, self.caption_idx[0].tolist(), self.caption_length[0].item())
            self.word_embeddings, self.sentence_embedding = util.vectorize_captions_idx_batch(
                                                        self.caption_idx, self.caption_length, self.text_encoder)
            self.text_mask = util.lengths_to_mask(self.caption_length, max_length=self.word_embeddings.size(-1))
            self.match_labels = torch.LongTensor(range(len(self.img_m)))
            if len(self.gpu_ids) > 0:
                self.word_embeddings = self.word_embeddings.cuda(self.gpu_ids[0], True)
                self.sentence_embedding = self.sentence_embedding.cuda(self.gpu_ids[0], True)
                self.text_mask = self.text_mask.cuda(self.gpu_ids[0], True)
                self.match_labels = self.match_labels.cuda(self.gpu_ids[0], True)
        
        def test(self, mark=None):
            """Forward function used in test time"""
            # save the groundtruth and masked image
            self.save_results(self.img_truth, data_name='truth')
            self.save_results(self.img_m, data_name='mask')

            # encoder process
            distribution, f, f_text = self.net_E(
                self.img_m, self.sentence_embedding, self.word_embeddings, self.text_mask, self.mask)
            variation_factor = 0. if opt.no_variance else 1.
            q_distribution = torch.distributions.Normal(distribution[-1][0], distribution[-1][1] * variation_factor)
            scale_mask = scale_img(self.mask, size=[f[2].size(2), f[2].size(3)])

            # decoder process
            for i in range(opt.nsampling):
                z = q_distribution.sample()

                self.img_g, attn = self.net_G(z, f_text, f_e=f[2], mask=scale_mask.chunk(3, dim=1)[0])
                self.img_out = (1 - self.mask) * self.img_g[-1].detach() + self.mask * self.img_m
                self.score = self.net_D(self.img_out)
                self.save_results(self.img_out, i, data_name='out', mark=mark)
        
        def get_distribution(self, distribution_factors):
            """Calculate encoder distribution for img_m, img_c only in train, all about distribution layer of VAE model"""
            # get distribution
            sum_valid = (torch.mean(self.mask.view(self.mask.size(0), -1), dim=1) - 1e-5).view(-1, 1, 1, 1)
            m_sigma = 1 if not self.opt.dynamic_sigma else (1 / (1 + ((sum_valid - self.prior_alpha) * self.prior_beta).exp_()))
            p_distribution, q_distribution, kl_rec, kl_g = 0, 0, 0, 0
            self.distribution = []
            for distribution in distribution_factors:
                p_mu, p_sigma, q_mu, q_sigma = distribution
                # the assumption distribution for different mask regions
                std_distribution = torch.distributions.Normal(torch.zeros_like(p_mu), m_sigma * torch.ones_like(p_sigma))
                # m_distribution = torch.distributions.Normal(torch.zeros_like(p_mu), torch.ones_like(p_sigma))
                # the post distribution from mask regions
                p_distribution = torch.distributions.Normal(p_mu, p_sigma)
                p_distribution_fix = torch.distributions.Normal(p_mu.detach(), p_sigma.detach())
                # the prior distribution from valid region
                q_distribution = torch.distributions.Normal(q_mu, q_sigma)

                # kl divergence
                kl_rec += torch.distributions.kl_divergence(std_distribution, p_distribution)
                if self.opt.train_paths == "one":
                    kl_g += torch.distributions.kl_divergence(std_distribution, q_distribution)
                elif self.opt.train_paths == "two":
                    kl_g += torch.distributions.kl_divergence(p_distribution_fix, q_distribution)
                self.distribution.append([torch.zeros_like(p_mu), m_sigma * torch.ones_like(p_sigma), p_mu, p_sigma, q_mu, q_sigma])

            return p_distribution, q_distribution, kl_rec, kl_g

In [6]:
class TDAnet():
#     @staticmethod
#     def modify_options(parser, is_train=True):
#         """Add new options and rewrite default values for existing options"""
#         parser.add_argument('--prior_alpha', type=float, default=0.8,
#                             help='factor to contorl prior variation: 1/(1+e^((x-0.8)*8))')
#         parser.add_argument('--prior_beta', type=float, default=8,
#                             help='factor to contorl prior variation: 1/(1+e^((x-0.8)*8))')
#         parser.add_argument('--no_maxpooling', action='store_true', help='rm maxpooling in DMA for ablation')
#         parser.add_argument('--update_language', action='store_true', help='update language encoder while training')
#         parser.add_argument('--detach_embedding', action='store_true',
#                             help='do not pass grad to embedding in DAMSM-text end')

#         if is_train:
#             parser.add_argument('--train_paths', type=str, default='two', help='training strategies with one path or two paths')
#             parser.add_argument('--dynamic_sigma', action='store_true', help='change sigma base on mask area')
#             parser.add_argument('--lambda_rec_l1', type=float, default=20.0, help='weight for image reconstruction loss')
#             parser.add_argument('--lambda_gen_l1', type=float, default=20.0, help='weight for image reconstruction loss')
#             parser.add_argument('--lambda_kl', type=float, default=20.0, help='weight for kl divergence loss')
#             parser.add_argument('--lambda_gan', type=float, default=1.0, help='weight for generation loss')
#             parser.add_argument('--lambda_match', type=float, default=0.1, help='weight for image-text match loss')

#         return parser

    def __init__(self, opt):
        """Initial the pluralistic model"""
        #BaseModel.__init__(self, opt)

        self.loss_names = ['kl_rec', 'kl_g', 'l1_rec', 'l1_g', 'gan_g', 'word_g', 'sentence_g', 'ad_l2_g',
                           'gan_rec', 'ad_l2_rec', 'word_rec', 'sentence_rec',  'dis_img', 'dis_img_rec']
        self.log_names = []
        self.visual_names = ['img_m', 'img_truth', 'img_c', 'img_out', 'img_g', 'img_rec']
        self.text_names = ['text_positive']
        self.value_names = ['u_m', 'sigma_m', 'u_post', 'sigma_post', 'u_prior', 'sigma_prior']
        self.model_names = ['E', 'G', 'D', 'D_rec']
        self.distribution = []
        self.prior_alpha = opt.prior_alpha
        self.prior_beta = opt.prior_beta
        self.max_pool = None if opt.no_maxpooling else 'max'

        ''' define the inpainting model '''
        self.net_E = AttTextualResEncoder(input_nc=3, ngf=32, z_nc=256, img_f=256, L=6, layers=5, norm='none', activation='LeakyReLU', use_spect=True, use_coord=False, image_dim=256, text_dim=256, multi_peak=False, pool_attention=self.max_pool)
        self.net_E = init_net(self.net_E, init_type='orthogonal', activation='ReLU', gpu_ids=opt.gpu_ids)


        self.net_G = HiddenResGenerator(output_nc=3, f_text_dim=768, ngf=32, z_nc=256, img_f=256, L=0, layers=5, norm='instance', activation='LeakyReLU', output_scale=opt.output_scale, use_spect=True, use_coord=False, use_attn=True)
        self.net_G = init_net(self.net_G, init_type='orthogonal', activation='LeakyReLU', gpu_ids=opt.gpu_ids)

        ''' define the discriminator model '''
        self.net_D = ResDiscriminator(input_nc=3, ndf=32, img_f=128, layers=5, norm='none', activation='LeakyReLU', use_spect=True, use_coord=False, use_attn=True)
        self.net_D = init_net(self.net_D, init_type='orthogonal', activation='LeakyReLU', gpu_ids=opt.gpu_ids)
        self.net_D_rec = copy.deepcopy(self.net_D)

        text_config = TextConfig(opt.text_config)
        self._init_language_model(text_config)

        if self.isTrain:
            # define the loss functions
            self.GANloss = external_function.GANLoss(opt.gan_mode)
            self.L1loss = torch.nn.L1Loss()
            self.L2loss = torch.nn.MSELoss()

            self.image_encoder = CNN_ENCODER(text_config.EMBEDDING_DIM)
            state_dict = torch.load(
                text_config.IMAGE_ENCODER, map_location=lambda storage, loc: storage)
            self.image_encoder.load_state_dict(state_dict)
            self.image_encoder.eval()
            if len(self.gpu_ids) > 0 and torch.cuda.is_available():
                self.image_encoder.cuda()
            base_function._freeze(self.image_encoder)

            # define the optimizer
            self.optimizer_G = torch.optim.Adam(itertools.chain(filter(lambda p: p.requires_grad, self.net_G.parameters()),
                        filter(lambda p: p.requires_grad, self.net_E.parameters())), lr=opt.lr, betas=(0.0, 0.999))
            self.optimizer_D = torch.optim.Adam(itertools.chain(filter(lambda p: p.requires_grad, self.net_D.parameters()),
                                                filter(lambda p: p.requires_grad, self.net_D_rec.parameters())),
                                                lr=opt.lr, betas=(0.0, 0.999))
            self.optimizers.append(self.optimizer_G)
            self.optimizers.append(self.optimizer_D)

        self.setup(opt)

    def _init_language_model(self, text_config):
        x = pickle.load(open(text_config.VOCAB, 'rb'))
        self.ixtoword = x[2]
        self.wordtoix = x[3]

        word_len = len(self.wordtoix)
        self.text_encoder = RNN_ENCODER(word_len, nhidden=256)

        state_dict = torch.load(text_config.LANGUAGE_ENCODER, map_location=lambda storage, loc: storage)
        self.text_encoder.load_state_dict(state_dict)
        self.text_encoder.eval()
        if not self.opt.update_language:
            self.text_encoder.requires_grad_(False)
        if len(self.gpu_ids) > 0 and torch.cuda.is_available():
            self.text_encoder.cuda()

    def set_input(self, input, epoch=0):
        """Unpack input data from the data loader and perform necessary pre-process steps"""
        self.input = input
        self.image_paths = self.input['img_path']
        self.img = input['img']
        self.mask = input['mask']
        self.caption_idx = input['caption_idx']
        self.caption_length = input['caption_len']

        if len(self.gpu_ids) > 0:
            self.img = self.img.cuda(self.gpu_ids[0], True)
            self.mask = self.mask.cuda(self.gpu_ids[0], True)

        # get I_m and I_c for image with mask and complement regions for training
        self.img_truth = self.img * 2 - 1
        self.img_m = self.mask * self.img_truth
        self.img_c =  (1 - self.mask) * self.img_truth

        # get multiple scales image ground truth and mask for training
        self.scale_img = task.scale_pyramid(self.img_truth, self.opt.output_scale)
        self.scale_mask = task.scale_pyramid(self.mask, self.opt.output_scale)

        # About text stuff
        self.text_positive = util.idx_to_caption(
                                    self.ixtoword, self.caption_idx[0].tolist(), self.caption_length[0].item())
        self.word_embeddings, self.sentence_embedding = util.vectorize_captions_idx_batch(
                                                    self.caption_idx, self.caption_length, self.text_encoder)
        self.text_mask = util.lengths_to_mask(self.caption_length, max_length=self.word_embeddings.size(-1))
        self.match_labels = torch.LongTensor(range(len(self.img_m)))
        if len(self.gpu_ids) > 0:
            self.word_embeddings = self.word_embeddings.cuda(self.gpu_ids[0], True)
            self.sentence_embedding = self.sentence_embedding.cuda(self.gpu_ids[0], True)
            self.text_mask = self.text_mask.cuda(self.gpu_ids[0], True)
            self.match_labels = self.match_labels.cuda(self.gpu_ids[0], True)

    def test(self, mark=None):
        """Forward function used in test time"""
        # save the groundtruth and masked image
        self.save_results(self.img_truth, data_name='truth')
        self.save_results(self.img_m, data_name='mask')

        # encoder process
        distribution, f, f_text = self.net_E(
            self.img_m, self.sentence_embedding, self.word_embeddings, self.text_mask, self.mask)
        variation_factor = 0. if self.opt.no_variance else 1.
        q_distribution = torch.distributions.Normal(distribution[-1][0], distribution[-1][1] * variation_factor)
        scale_mask = task.scale_img(self.mask, size=[f[2].size(2), f[2].size(3)])

        # decoder process
        for i in range(self.opt.nsampling):
            z = q_distribution.sample()

            self.img_g, attn = self.net_G(z, f_text, f_e=f[2], mask=scale_mask.chunk(3, dim=1)[0])
            self.img_out = (1 - self.mask) * self.img_g[-1].detach() + self.mask * self.img_m
            self.score = self.net_D(self.img_out)
            self.save_results(self.img_out, i, data_name='out', mark=mark)

    def get_distribution(self, distribution_factors):
        """Calculate encoder distribution for img_m, img_c only in train, all about distribution layer of VAE model"""
        # get distribution
        sum_valid = (torch.mean(self.mask.view(self.mask.size(0), -1), dim=1) - 1e-5).view(-1, 1, 1, 1)
        m_sigma = 1 if not self.opt.dynamic_sigma else (1 / (1 + ((sum_valid - self.prior_alpha) * self.prior_beta).exp_()))
        p_distribution, q_distribution, kl_rec, kl_g = 0, 0, 0, 0
        self.distribution = []
        for distribution in distribution_factors:
            p_mu, p_sigma, q_mu, q_sigma = distribution
            # the assumption distribution for different mask regions
            std_distribution = torch.distributions.Normal(torch.zeros_like(p_mu), m_sigma * torch.ones_like(p_sigma))
            # m_distribution = torch.distributions.Normal(torch.zeros_like(p_mu), torch.ones_like(p_sigma))
            # the post distribution from mask regions
            p_distribution = torch.distributions.Normal(p_mu, p_sigma)
            p_distribution_fix = torch.distributions.Normal(p_mu.detach(), p_sigma.detach())
            # the prior distribution from valid region
            q_distribution = torch.distributions.Normal(q_mu, q_sigma)

            # kl divergence
            kl_rec += torch.distributions.kl_divergence(std_distribution, p_distribution)
            if self.opt.train_paths == "one":
                kl_g += torch.distributions.kl_divergence(std_distribution, q_distribution)
            elif self.opt.train_paths == "two":
                kl_g += torch.distributions.kl_divergence(p_distribution_fix, q_distribution)
            self.distribution.append([torch.zeros_like(p_mu), m_sigma * torch.ones_like(p_sigma), p_mu, p_sigma, q_mu, q_sigma])

        return p_distribution, q_distribution, kl_rec, kl_g

    def get_G_inputs(self, p_distribution, q_distribution, f):
        """Process the encoder feature and distributions for generation network, combine two dataflow when implement."""
        f_m = torch.cat([f[-1].chunk(2)[0], f[-1].chunk(2)[0]], dim=0)
        f_e = torch.cat([f[2].chunk(2)[0], f[2].chunk(2)[0]], dim=0)
        scale_mask = task.scale_img(self.mask, size=[f_e.size(2), f_e.size(3)])
        mask = torch.cat([scale_mask.chunk(3, dim=1)[0], scale_mask.chunk(3, dim=1)[0]], dim=0)
        z_p = p_distribution.rsample()
        z_q = q_distribution.rsample()
        z = torch.cat([z_p, z_q], dim=0)
        return z, f_m, f_e, mask

    def forward(self):
        """Run forward processing to get the inputs"""
        # encoder process
        distribution_factors, f, f_text = self.net_E(
            self.img_m, self.sentence_embedding, self.word_embeddings, self.text_mask, self.mask, self.img_c)

        p_distribution, q_distribution, self.kl_rec, self.kl_g = self.get_distribution(distribution_factors)

        # decoder process
        z, f_m, f_e, mask = self.get_G_inputs(p_distribution, q_distribution, f) # prepare inputs: img, mask, distribute

        results, attn = self.net_G(z, f_text, f_e, mask)
        self.img_rec = []
        self.img_g = []
        for result in results:
            img_rec, img_g = result.chunk(2)
            self.img_rec.append(img_rec)
            self.img_g.append(img_g)
        self.img_out = (1-self.mask) * self.img_g[-1].detach() + self.mask * self.img_truth

        self.region_features_rec, self.cnn_code_rec = self.image_encoder(self.img_rec[-1])
        self.region_features_g, self.cnn_code_g = self.image_encoder(self.img_g[-1])


    def backward_D_basic(self, netD, real, fake):
        """Calculate GAN loss for the discriminator"""
        # Real
        D_real = netD(real)
        D_real_loss = self.GANloss(D_real, True, True)
        # fake
        D_fake = netD(fake.detach())
        D_fake_loss = self.GANloss(D_fake, False, True)
        # loss for discriminator
        D_loss = (D_real_loss + D_fake_loss) * 0.5
        # gradient penalty for wgan-gp
        if self.opt.gan_mode == 'wgangp':
            gradient_penalty, gradients = external_function.cal_gradient_penalty(netD, real, fake.detach())
            D_loss +=gradient_penalty

        D_loss.backward()

        return D_loss

    def backward_D(self):
        """Calculate the GAN loss for the discriminators"""
        base_function._unfreeze(self.net_D, self.net_D_rec)
        ## Note: changed gen path gan loss to rec path
        # self.loss_dis_img = self.backward_D_basic(self.net_D, self.img_truth, self.img_g[-1])
        self.loss_dis_img = self.backward_D_basic(self.net_D, self.img_truth, self.img_rec[-1])
        self.loss_dis_img_rec = self.backward_D_basic(self.net_D_rec, self.img_truth, self.img_rec[-1])

    def backward_G(self):
        """Calculate training loss for the generator"""

        # encoder kl loss
        self.loss_kl_rec = self.kl_rec.mean() * self.opt.lambda_kl * self.opt.output_scale
        self.loss_kl_g = self.kl_g.mean() * self.opt.lambda_kl * self.opt.output_scale

        # Adversarial loss
        base_function._freeze(self.net_D, self.net_D_rec)

        # D loss fake
        D_fake_g = self.net_D(self.img_g[-1])
        self.loss_gan_g = self.GANloss(D_fake_g, True, False) * self.opt.lambda_gan
        D_fake_rec = self.net_D(self.img_rec[-1])
        self.loss_gan_rec = self.GANloss(D_fake_rec, True, False) * self.opt.lambda_gan

        # LSGAN loss
        D_fake = self.net_D_rec(self.img_rec[-1])
        D_real = self.net_D_rec(self.img_truth)
        D_fake_g = self.net_D_rec(self.img_g[-1])
        self.loss_ad_l2_rec = self.L2loss(D_fake, D_real) * self.opt.lambda_gan
        self.loss_ad_l2_g = self.L2loss(D_fake_g, D_real) * self.opt.lambda_gan

        # Text-image consistent loss
        if not self.opt.detach_embedding:
            sentence_embedding = self.sentence_embedding
            word_embeddings = self.word_embeddings
        else:
            sentence_embedding = self.sentence_embedding.detach()
            word_embeddings = self.word_embeddings.detach()


        loss_sentence = base_function.sent_loss(self.cnn_code_rec, sentence_embedding, self.match_labels)
        loss_word, _ = base_function.words_loss(self.region_features_rec, word_embeddings, self.match_labels, \
                                 self.caption_length, len(word_embeddings))
        self.loss_word_rec = loss_word * self.opt.lambda_match
        self.loss_sentence_rec = loss_sentence * self.opt.lambda_match

        loss_sentence = base_function.sent_loss(self.cnn_code_g, sentence_embedding, self.match_labels)
        loss_word, _ = base_function.words_loss(self.region_features_g, word_embeddings, self.match_labels, \
                                 self.caption_length, len(word_embeddings))
        self.loss_word_g = loss_word * self.opt.lambda_match
        self.loss_sentence_g = loss_sentence * self.opt.lambda_match


        # calculate l1 loss ofr multi-scale, multi-depth-level outputs
        loss_l1_rec, loss_l1_g, log_PSNR_rec, log_PSNR_out = 0, 0, 0, 0
        for i, (img_rec_i, img_fake_i, img_out_i, img_real_i, mask_i) in enumerate(zip(self.img_rec, self.img_g, self.img_out, self.scale_img, self.scale_mask)):
            loss_l1_rec += self.L1loss(img_rec_i, img_real_i)
            if self.opt.train_paths == "one":
                loss_l1_g += self.L1loss(img_fake_i, img_real_i)
            elif self.opt.train_paths == "two":
                loss_l1_g += self.L1loss(img_fake_i, img_real_i)

        self.loss_l1_rec = loss_l1_rec * self.opt.lambda_rec_l1
        self.loss_l1_g = loss_l1_g * self.opt.lambda_gen_l1

        # if one path during the training, just calculate the loss for generation path
        if self.opt.train_paths == "one":
            self.loss_l1_rec = self.loss_l1_rec * 0
            self.loss_ad_l2_rec = self.loss_ad_l2_rec * 0
            self.loss_kl_rec = self.loss_kl_rec * 0

        total_loss = 0

        for name in self.loss_names:
            if name != 'dis_img' and name != 'dis_img_rec':
                total_loss += getattr(self, "loss_" + name)

        total_loss.backward()

    def optimize_parameters(self):
        """update network weights"""
        # compute the image completion results
        self.forward()
        # optimize the discrinimator network parameters
        self.optimizer_D.zero_grad()
        self.backward_D()
        self.optimizer_D.step()
        # optimize the completion network parameters
        self.optimizer_G.zero_grad()
        self.backward_G()
        self.optimizer_G.step()

    def save_results(self, save_data, score=None, data_name='none', mark=None):
        """Save the training or testing results to disk"""
        img_paths = self.get_image_paths()

        for i in range(save_data.size(0)):
            print('process image ...... %s' % img_paths[i])
            short_path = ntpath.basename(img_paths[i])  # get image path
            name = os.path.splitext(short_path)[0]
            if type(score) == type(None):
                img_name = '%s_%s.png' % (name, data_name)
            else:
                # d_score = score[i].mean()
                # img_name = '%s_%s_%s.png' % (name, data_name, str(round(d_score.item(), 3)))
                if type(mark) == type(None):
                    img_name = '%s_%s_%s.png' % (name, data_name, str(score))
                else:
                    img_name = '%s_%s_%s_%s.png' % (name, data_name, str(score), str(mark))
            # save predicted image with discriminator score
            util.mkdir(self.opt.results_dir)
            img_path = os.path.join(self.opt.results_dir, img_name)
            img_numpy = util.tensor2im(save_data[i].data)
            util.save_image(img_numpy, img_path)

    # load models
    def load_networks(self, which_epoch, gpu_ids):
        """Load all the networks from the disk"""
        for name in self.model_names:
            if isinstance(name, str):
                filename = '%s_net_%s.pth' % (which_epoch, name)
                path = os.path.join(self.save_dir, filename)
                net = getattr(self, 'net_' + name)
                pretrained_dict = torch.load(path)
                try:
                    if len(gpu_ids) != 0:
                        net.load_state_dict(pretrained_dict)
                    else:
                        pretrained_dict_cpu = {key[7:]:value for key, value in pretrained_dict.items()}
                        net.load_state_dict(pretrained_dict_cpu)
                except:
                    model_dict = net.state_dict()
                    try:
                        pretrained_dict = {k:v for k,v in pretrained_dict.items() if k in model_dict}
                        net.load_state_dict(pretrained_dict)
                        print('Pretrained network %s has excessive layers; Only loading layers that are used' % name)
                    except:
                        print('Pretrained network %s has fewer layers; The following are not initialized:' % name)
                        not_initialized = set()
                        for k, v in pretrained_dict.items():
                            if v.size() == model_dict[k].size():
                                model_dict[k] = v

                        for k, v in model_dict.items():
                            if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
                                not_initialized.add(k.split('.')[0])
                        print(sorted(not_initialized))
                        net.load_state_dict(model_dict)
                if len(self.gpu_ids) > 0 and torch.cuda.is_available():
                    net.cuda()
                if not self.isTrain:
                    net.eval()
    def setup(self, opt):
        """Load networks, create schedulers"""
        if self.isTrain:
            self.schedulers = [base_function.get_scheduler(optimizer, opt) for optimizer in self.optimizers]
        if not self.isTrain or opt.continue_train:
            self.load_networks(opt.which_iter, opt.gpu_ids)

    def eval(self):
        """Make models eval mode during test time"""
        for name in self.model_names:
            if isinstance(name, str):
                net = getattr(self, 'net_' + name)
                net.eval()

    def get_image_paths(self):
        """Return image paths that are used to load current data"""
        return self.image_paths

    def update_learning_rate(self):
        """Update learning rate"""
        for scheduler in self.schedulers:
            scheduler.step()
        lr = self.optimizers[0].param_groups[0]['lr']
        print('learning rate=%.7f' % lr)

    def get_current_errors(self):
        """Return training loss"""
        errors_ret = OrderedDict()
        for name in self.loss_names:
            if isinstance(name, str):
                errors_ret[name] = getattr(self, 'loss_' + name).item()

        if 'img_truth' in self.visual_names:
            truth = getattr(self, 'img_truth')
            outputs_names = ['img_out', 'img_g', 'img_rec']
            for name in outputs_names:
                if name in self.visual_names:
                    out = getattr(self, name)
                    psnr = util.PSNR(util.tensor2im(out[-1].data), util.tensor2im(truth[-1].data))
                    errors_ret['psnr_'+name] = psnr

        return errors_ret

    def get_current_visuals(self):
        """Return visualization images"""
        visual_ret = OrderedDict()
        for name in self.visual_names:
            if isinstance(name, str):
                value = getattr(self, name)
                if isinstance(value, list):
                    visual_ret[name] = util.tensor2im(value[-1].data)
                else:
                    visual_ret[name] = util.tensor2im(value.data)
        return visual_ret

    def get_current_text(self):
        """Return the last image's caption of current batch"""
        text_ret = OrderedDict()
        for name in self.text_names:
            if isinstance(name, str):
                text = getattr(self, name)
                if isinstance(text, list):
                    text_ret[name] = text[-1] + '\n'+ self.image_paths[0]
                else:
                    text_ret[name] = text + '\n' + self.image_paths[0]
        return text_ret

    def get_current_dis(self):
        """Return the distribution of encoder features"""
        dis_ret = OrderedDict()
        value = getattr(self, 'distribution')
        for i in range(1):
            for j, name in enumerate(self.value_names):
                if isinstance(name, str):
                    dis_ret[name+str(i)] =util.tensor2array(value[i][j].data)

        return dis_ret

    # save model
    def save_networks(self, which_epoch):
        """Save all the networks to the disk"""
        for name in self.model_names:
            if isinstance(name, str):
                save_filename = '%s_net_%s.pth' % (which_epoch, name)
                save_path = os.path.join(self.save_dir, save_filename)
                net = getattr(self, 'net_' + name)
                torch.save(net.cpu().state_dict(), save_path)
                if len(self.gpu_ids) > 0 and torch.cuda.is_available():
                    net.cuda()


In [7]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

a={"a":1,"b":2}

In [8]:
model=TDAnet(opt)

AttTextualResEncoder(
  (block0): ResBlockEncoderOptimized(
    (conv1): SpectralNorm(
      (module): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (conv2): SpectralNorm(
      (module): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (bypass): SpectralNorm(
      (module): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
    )
    (model): Sequential(
      (0): SpectralNorm(
        (module): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (1): LeakyReLU(negative_slope=0.1)
      (2): SpectralNorm(
        (module): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (3): AvgPool2d(kernel_size=2, stride=2, padding=0)
    )
    (shortcut): Sequential(
      (0): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (1): SpectralNorm(
        (module): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
      )
    )
  )
  (word_attention): ImageTextAttention(
    (conv_image

  "num_layers={}".format(dropout, num_layers))


AttributeError: 'TDAnet' object has no attribute 'opt'

In [13]:
import os
import os.path

IMG_EXTENSIONS = [
    '.jpg', '.JPG', '.jpeg', '.JPEG',
    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]


def is_image_file(filename):
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)


def make_dataset(path_files):
    if os.path.isfile(path_files):
        paths, size = make_dataset_txt(path_files)
    else:
        paths, size = make_dataset_dir(path_files)

    return paths, size


def make_dataset_txt(files):
    """
    :param path_files: the path of txt file that store the image paths
    :return: image paths and sizes
    """
    img_paths = []

    with open(files) as f:
        paths = f.readlines()

    for path in paths:
        path = path.strip()
        img_paths.append(os.path.join(os.path.dirname(files), path))

    return img_paths, len(img_paths)


def make_dataset_dir(dir):
    """
    :param dir: directory paths that store the image
    :return: image paths and sizes
    """
    img_paths = []

    assert os.path.isdir(dir), '%s is not a valid directory' % dir

    for root, _, fnames in os.walk(dir):
        for fname in sorted(fnames):
            if is_image_file(fname):
                path = os.path.join(root, fname)
                img_paths.append(path)

    return img_paths, len(img_paths)


In [9]:
import torch
import random
import json
import pickle
import os
import numpy as np
import imageio
from PIL import Image, ImageFile
import torchvision.transforms as transforms
import torch.utils.data as data
from util import task, util
from options.global_config import TextConfig

class CreateDataset(data.Dataset):
    def __init__(self, opt, debug=False):
        self.opt = opt
        self.debug = debug
        self.img_paths, self.img_size = make_dataset(opt.img_file)
        # provides random file for training and testing
        if opt.mask_file != 'none':
            if not opt.mask_file.endswith('.json'):
                self.mask_paths, self.mask_size = make_dataset(opt.mask_file)
            else:
                with open(opt.mask_file, 'r') as f:
                    self.image_bbox = json.load(f)

        self.transform = get_transform(opt)

        ## ========Abnout text stuff===============
        text_config = TextConfig(opt.text_config)
        self.max_length = text_config.MAX_TEXT_LENGTH
        if 'coco' in text_config.CAPTION.lower():
            self.num_captions = 5
        elif 'place' in text_config.CAPTION.lower():
            self.num_captions = 1
        else:
            self.num_captions = 10

        # load caption file
        with open(text_config.CAPTION, 'r') as f:
            self.captions = json.load(f)

        x = pickle.load(open(text_config.VOCAB, 'rb'))
        self.ixtoword = x[2]
        self.wordtoix = x[3]

        self.epoch = 0 # Used for iter on captions.

    def __getitem__(self, index):
        # load image
        index = self.epoch*self.img_size+index

        img, img_path = self.load_img(index)
        # load mask
        mask = self.load_mask(img, index, img_path)
        assert sum(img.shape) == sum(mask.shape), (img.shape, mask.shape)
        caption_idx, caption_len, caption, img_name= self._load_text_idx(index)
        return {'img': img, 'img_path': img_path, 'mask': mask, \
                'caption_idx' : torch.Tensor(caption_idx).long(), 'caption_len':caption_len,\
                'caption_text': caption, 'image_path': img_name}

    def __len__(self):
        return self.img_size

    def name(self):
        return "inpainting dataset"

    def load_img(self, index):
        ImageFile.LOAD_TRUNCATED_IMAGES = True
        img_path = self.img_paths[index % self.img_size]
        img_pil = Image.open(img_path).convert('RGB')
        img = self.transform(img_pil)
        img_pil.close()
        return img, img_path

    def _load_text_idx(self, image_index):
        img_name = self.img_paths[image_index % self.img_size]
        caption_index_of_image = image_index // self.img_size  % self.num_captions
        img_name = os.path.basename(img_name)
        captions = self.captions[img_name]
        caption = captions[caption_index_of_image] if type(captions) == list else captions
        caption_idx, caption_len = util._caption_to_idx(self.wordtoix, caption, self.max_length)

        return caption_idx, caption_len, caption, img_name

    def load_mask(self, img, index, img_path):
        """Load different mask types for training and testing"""
        mask_type_index = random.randint(0, len(self.opt.mask_type) - 1)
        mask_type = self.opt.mask_type[mask_type_index]

        # center mask
        if mask_type == 0:
            return task.center_mask(img)

        # random regular mask
        if mask_type == 1:
            return task.random_regular_mask(img)

        # random irregular mask
        if mask_type == 2:
            return task.random_irregular_mask(img)

        if mask_type == 3:
            # file masks, e.g. CUB object mask
            mask_index = index
            mask_pil = Image.open(self.mask_paths[mask_index]).convert('RGB')

            mask_transform = get_transform_mask(self.opt)

            mask = (mask_transform(mask_pil) == 0).float()
            mask_pil.close()
            return mask

        if mask_type == 4:
            # coco json file object mask
            if os.path.basename(img_path) not in self.image_bbox:
                return task.random_regular_mask(img)

            img_original = np.asarray(Image.open(img_path).convert('RGB'))

            # create a mask matrix same as img_original
            mask = np.zeros_like(img_original)
            bboxes = self.image_bbox[os.path.basename(img_path)]

            # choose max area box
            choosen_box = 0,0,0,0
            max_area = 0
            for x1,x2,y1,y2 in bboxes:
                area = (x2-x1) * (y2-y1)
                if area > max_area:
                    max_area = area
                    choosen_box = x1,x2,y1,y2
            x1, x2, y1, y2 = choosen_box
            mask[x1:x2, y1:y2] = 1

            # apply same transform as img to the mask
            mask_pil = Image.fromarray(mask)

            mask_transform = get_transform_mask(self.opt)

            mask = (mask_transform(mask_pil) == 0).float()

            mask_pil.close()

            return mask

def dataloader(opt):
    datasets = CreateDataset(opt)
    print(datasets,opt.batchSize,opt.no_shuffle,opt.nThreads)
    dataset = data.DataLoader(datasets, batch_size=opt.batchSize, shuffle=not opt.no_shuffle, num_workers=int(opt.nThreads[0]), pin_memory=True)
    return dataset

def get_transform_mask(opt):
    """Basic process to transform PIL image to torch tensor"""
    transform_list = []
    osize = [opt.loadSize[0], opt.loadSize[1]]
    fsize = [opt.fineSize[0], opt.fineSize[1]]
    if opt.isTrain:
        if opt.resize_or_crop == 'resize_and_crop':
            transform_list.append(transforms.Resize(osize))
            transform_list.append(transforms.RandomCrop(fsize))
        elif opt.resize_or_crop == 'crop':
            transform_list.append(transforms.RandomCrop(fsize))
        if not opt.no_flip:
            transform_list.append(transforms.RandomHorizontalFlip())
        if not opt.no_rotation:
            transform_list.append(transforms.RandomRotation(3))
    else:
        transform_list.append(transforms.Resize(fsize))

    transform_list += [transforms.ToTensor()]

    return transforms.Compose(transform_list)

def get_transform(opt):
    """Basic process to transform PIL image to torch tensor"""
    transform_list = []
    osize = [opt.loadSize[0], opt.loadSize[1]]
    fsize = [opt.fineSize[0], opt.fineSize[1]]
    if opt.isTrain:
        if opt.resize_or_crop == 'resize_and_crop':
            transform_list.append(transforms.Resize(osize))
            transform_list.append(transforms.RandomCrop(fsize))
        elif opt.resize_or_crop == 'crop':
            transform_list.append(transforms.RandomCrop(fsize))
        if not opt.no_augment:
            transform_list.append(transforms.ColorJitter(0.0, 0.0, 0.0, 0.0))
        if not opt.no_flip:
            transform_list.append(transforms.RandomHorizontalFlip())
        if not opt.no_rotation:
            transform_list.append(transforms.RandomRotation(3))
    else:
        transform_list.append(transforms.Resize(fsize))

    transform_list += [transforms.ToTensor()]

    return transforms.Compose(transform_list)


# Dataset

In [14]:
dataset= dataloader(opt)
dataset_size = len(dataset) * opt.batchSize

<__main__.CreateDataset object at 0x7fa782947510> 10 False (8,)


# Training

In [11]:
###opt = TrainOptions().parse()

print('number of training images =',dataset_size)
# create a model
model = create_model(opt)

# create a visualizer
#visualizer = Visualizer(opt)

# training flag
keep_training = True
max_iteration = opt.niter+opt.niter_decay
epoch = 0
total_iteration = opt.iter_count

# training process
while(keep_training):
    epoch_start_time = time.time()
    epoch+=1
    print('\n Training epoch: %d' % epoch)

    for i, data in enumerate(dataset):
        dataset.epoch = epoch - 1
        iter_start_time = time.time()
        total_iteration += 1
        model.set_input(data)
        model.optimize_parameters()
        if total_iteration % opt.print_freq == 0:
            losses = model.get_current_errors()
            
        # save the latest model every <save_latest_freq> iterations to the disk
        if total_iteration % opt.save_latest_freq == 0:
            print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_iteration))
            model.save_networks('latest')

        # save the model every <save_iter_freq> iterations to the disk
        if total_iteration % opt.save_iters_freq == 0:
            print('saving the model of iterations %d' % total_iteration)
            model.save_networks(total_iteration)

        if total_iteration > max_iteration:
            keep_training = False
            break

    model.update_learning_rate()

    print('\nEnd training')


NameError: name 'dataset_size' is not defined

In [26]:
import pickle

# Open the pickle file in binary read mode
i = 0
with open(DEFAULT_CONFIG['VOCAB'], 'rb') as f:
    # Load the data from the pickle file
    if(i>100):
        exit()
    data = pickle.load(f)
    i += 1
# Print the data
print(data)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [33]:
import subprocess

# Replace 'your_pipe_file' with the path to your pipe file
pipe_file_path = DEFAULT_CONFIG['VOCAB']

# try:
#     # Open the pipe file for reading
#     with open(pipe_file_path, 'rb', buffering=0) as pipe:
#         # Read the content from the pipe
#         content = pipe.read(10024)
#         print("Content of the pipe file:")
#         print(content)
# except FileNotFoundError:
#     print("Pipe file not found.")
# except Exception as e:
#     print("An error occurred:", e)

objects = []
with (open(pipe_file_path, "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break

In [34]:
objects

[[[[3066,
    4217,
    2622,
    3066,
    2774,
    3959,
    340,
    3950,
    1946,
    3066,
    3959,
    1692,
    4839],
   [1010, 3959, 5274, 4217, 907, 3066, 1902, 4220, 2622, 3959, 1394, 3824],
   [4341,
    935,
    3066,
    1902,
    4217,
    2622,
    1394,
    3824,
    1946,
    3066,
    2801,
    671,
    4839],
   [4341,
    2801,
    4217,
    907,
    3959,
    81,
    3066,
    1902,
    3484,
    964,
    1227,
    1946,
    3066,
    2074,
    3857,
    3824],
   [4217,
    907,
    3973,
    3404,
    3959,
    3132,
    2622,
    3066,
    2642,
    543,
    1946,
    1902,
    964,
    1946,
    1227],
   [4341, 4217, 907, 3824, 1913, 3530, 1394, 1946, 907, 3066, 1902, 964],
   [4341, 4217, 907, 2334, 3824, 1946, 3066, 1902, 1227, 1946, 4220],
   [4341, 4217, 935, 1902, 1946, 1394, 267, 2206, 2622, 3066, 3959, 945, 4839],
   [4341, 1902, 1946, 3332, 4217, 907, 3973, 495, 340, 3950],
   [4341, 4217, 907, 3824, 1913, 3530, 1394, 1946, 907, 3066, 1902, 4220],