In [2]:
from ConvGRU import ConvGRU, ConvGRUCell
from reformer.reformer_enc_dec import ReformerEncDec
from reformer.reformer_pytorch import Reformer, ReformerLM
from patchify import patchify, unpatchify
from axial_positional_embedding import AxialPositionalEmbedding
from transformers import ReformerModel, ReformerConfig, ReformerTokenizer
import deepspeed

import argparse
import os
import sys
import numpy as np
import math
import pickle
import cv2 as cv
import matplotlib
import matplotlib.pyplot as plt
import random
import time
import json
from cv2 import VideoWriter, VideoWriter_fourcc, imread

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torch.autograd import Variable
from torch.cuda.amp import autocast, GradScaler

import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision

import warnings
torch.cuda.set_device(0)

dataset_dir = r"C:\Users/Leo's PC/Documents/SSTP Tests/SSTP/GruGan/test_frames"

In [2]:
def img2embedding(imgs: np.array, patch_shape):
    # N, C, H, W
    out = []
    if len(imgs.shape) == 3:
        imgs = np.expand_dims(imgs, 1)
    if imgs.shape[1] == 1: # if grayscale
        for img in imgs:
            img = img[0]
            patches = patchify(img, patch_shape, step=patch_shape)
            patches = np.reshape(patches, (int((img.shape[0]/patch_shape[0]) * (img.shape[1]/patch_shape[1])), int(patch_shape[0] * patch_shape[1])))
            out.append(patches)
            
        out = np.asarray(out)
        return out
    
toVTensor = lambda x : Variable(torch.Tensor(x).cuda())

In [3]:
class ReformerDatasetFast(Dataset):

    def __init__(self, file_dir, transform=None, seq_len=1):

        self.dir = file_dir
        self.transform = transform
        self.seq_len = seq_len
        self.diction = [] # yes, yes, it is an array called diction
        
        readImage = lambda filename: self.transform(np.array(cv.imread(os.path.join(self.dir, filename)) / 255)) if self.transform else np.array(cv.imread(os.path.join(self.dir, filename)) / 255)
        
        idx = 0
        for filename in os.listdir(self.dir):
            if filename.endswith('jpg'):
                self.diction.append(readImage(filename))
                idx += 1

    def __len__(self):
        return len(self.diction) - 1


    def __getitem__(self, idx):
        start = time.time()
        x, y = self.diction[idx*self.seq_len : (idx+1)*self.seq_len], self.diction[idx*self.seq_len+1 : (idx+1)*self.seq_len+1]
        print(time.time() - start)
        x, y = torch.Tensor(np.asarray(x)), torch.Tensor(np.asarray(y))
        print(time.time() - start)
        return [x, y]

In [2]:
class ReformerDataset(Dataset):

    def __init__(self, file_dir, transform=None, seq_len=1):

        self.dir = file_dir
        self.transform = transform
        self.seq_len = seq_len
        self.diction = [] # yes, yes, it is an array called diction

        idx = 0
        for filename in os.listdir(self.dir):
            if filename.endswith('jpg'):
                self.diction.append(filename)
                idx += 1

    def __len__(self):
        return len(self.diction) - 1


    def __getitem__(self, idx):
        start = time.time()
        readImage = lambda filename: self.transform(np.array(cv.imread(os.path.join(self.dir, filename)) / 255)) if self.transform else np.array(cv.imread(os.path.join(self.dir, filename)) / 255)
        
        x, y = self.diction[idx*self.seq_len : (idx+1)*self.seq_len], self.diction[idx*self.seq_len+1 : (idx+1)*self.seq_len+1]
        x, y = torch.Tensor(np.asarray(list(map(readImage, x)))), torch.Tensor(np.asarray(list(map(readImage, y))))
        return [x, y]


def HWC2CHW(x):
    return np.array(x).transpose(2, 0, 1)


dataset = ReformerDataset(file_dir=dataset_dir, transform=HWC2CHW, seq_len=256)

loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, drop_last=True, num_workers=0)

In [18]:
for i, imgs in enumerate(loader):
    for j in imgs:
        print(j.shape)
    break

0.0
0.6571147441864014
0.0
0.7066318988800049
0.0004951953887939453
0.7661302089691162
0.0
0.7226262092590332
torch.Size([4, 256, 3, 256, 256])
torch.Size([4, 256, 3, 256, 256])


In [16]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.encoder = ReformerLM(
            dim = 256,
            depth = 6,
            heads = 8,
            max_seq_len = 256,
            bucket_size = 64,
            causal = False,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys.
        ).cuda()

        self.decoder = ReformerLM(
            dim = 256,
            depth = 6,
            heads = 8,
            max_seq_len = 256,
            bucket_size = 64,
            causal = False,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys; otherwise would get a softmax activation of vocab dict distribution
        ).cuda()
        
    def forward(self, x, y_prev):
        self.encoded_keys = self.encoder(x)
        self.output = self.decoder(y_prev, keys = self.encoded_keys)
        return self.output
    
    
class Discriminator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        
        self.encoder = ReformerLM(
            dim = 256,
            depth = 6,
            heads = 8,
            max_seq_len = 256,
            bucket_size = 64,
            causal = False,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys.
        ).cuda()
        
        self.decoder = ReformerLM(
            dim = 256,
            depth = 6,
            heads = 8,
            max_seq_len = 256,
            bucket_size = 64,
            causal = False,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys; otherwise would get a softmax activation of vocab dict distribution
        ).cuda()
        
        self.fc = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(x, y_prev):
        self.encoded_keys = self.encoder(x)
        self.embeddings = self.decoder(y_prev, keys = self.encoded_keys)
        self.output = self.fc(self.embeddings)
        self.output = self.sigmoid(self.output)
        return self.output, self.embeddings
            

class Decoder_Generator(nn.Module):
    def __init__(self):
        super(Decoder_Generator, self).__init__()

        self.decoder = ReformerLM(
            dim = 256,
            depth = 6,
            heads = 8,
            max_seq_len = 16384, # ~10 seconds
            bucket_size = 64,
            causal = True,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys; otherwise would get a softmax activation of vocab dict distribution
        ).cuda()
        
    def forward(self, x):
        self.output = self.decoder(x)
        return self.output
    

class Input_Conv(nn.Module):
    def __init__(self):
        super(Input_Conv, self).__init__()
        
        # Initialize the DenseBlock, input shape is (n, 3, 256, 256), output shape is (n, 64, 16, 16)
        self.denseblock = torchvision.models.densenet121()
        self.denseblock.features.transition1.conv = nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.denseblock.features.transition1.pool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)
        self.denseblock = nn.Sequential(*list(self.denseblock.features.children())[:6])
        
    def forward(self, x):
        return self.denseblock(x)

## Embeddings:
### Generator:
#### Encoder Input:
Encoder input is the sample image, broken up into 16x16 patches (for a 256x256 image, that's 256 vectors of length 256). However, for a colored image (3x256x256,) the channels need to be mapped. To get the input, we first cat(R, G, B), which has shape (768, 256). But we need to concatenate RGB embeddings, as well as positional embeddings, to the end of each pixel array. If RGB encoding occupies 64 numbers and positional (in-image spatial) occupies 196 numbers, we'll get a vector of length 512 for each patch, making our input of shape (256, 512) for a 256x256 image.
#### Decoder Input:
Compare to the encoder input, the decoder needs to handle one more dimension -- the time dimension of the video frame sequence. 
### Discriminator:
#### Encoder Input:
(same as Generator encoder input) Encoder input is the sample image, broken up into 16x16 patches (for a 256x256 image, that's 256 vectors of length 256). However, for a colored image (3x256x256,) the channels need to be mapped. To get the input, we first cat(R, G, B), which has shape (768, 256). But we need to concatenate RGB embeddings, as well as positional embeddings, to the end of each pixel array. If RGB encoding occupies 64 numbers and positional (in-image spatial) occupies 196 numbers, we'll get a vector of length 512 for each patch, making our input of shape (256, 512) for a 256x256 image.
#### Decoder Input:

In [17]:
featuremap_embedder = nn.Embedding(num_embeddings=64, embedding_dim=128).cuda()
sequence_position_embedder = nn.Embedding(num_embeddings=256, embedding_dim=128).cuda()

CNN = Input_Conv().cuda()
Decoder = Decoder_Generator().cuda()

In [18]:
print(sequence_position_embedder(torch.Tensor([0]).long().cuda()))

tensor([[-1.2355,  1.0560,  1.5756, -0.1583, -1.6614,  0.1206, -0.7047,  0.5283,
         -2.0649, -0.5863, -0.3184,  0.3223, -1.8305,  0.5860, -1.9457,  0.9726,
         -0.2147,  1.5721,  1.0855, -0.7001,  0.7131,  1.4095,  1.2315,  0.6028,
         -1.1094, -1.7353,  2.4519, -0.6465, -2.7521,  0.4146, -0.2117, -0.5878,
          0.2334, -0.0936,  0.9556, -0.0413, -0.3043, -0.4374, -0.3279, -0.8669,
         -0.0206, -0.3320, -0.3252, -0.2264,  1.4192,  0.0129,  0.5412, -0.0563,
          1.3217, -0.5745,  0.4835, -0.4406,  0.8775,  0.9977, -0.1232,  0.3732,
          0.9167, -1.7157,  1.4234,  1.9801, -0.1875,  1.2845, -0.9090, -0.2151,
          0.7727,  0.3833,  0.0052,  0.4972, -0.2087, -0.2124,  2.0634,  1.5586,
          0.8043, -0.0194,  0.2682, -1.2101,  0.2546,  0.0451, -0.6861, -1.2096,
         -0.3958,  0.8871,  0.9399,  0.6088, -0.1564, -0.9575,  1.3501,  0.6570,
         -0.2680,  1.3121, -0.7205,  0.0966, -2.1839,  0.0475,  0.4677,  2.1815,
          0.0533, -0.8190,  

In [19]:
pos_embedder = AxialPositionalEmbedding(256, (256, 64))
fmap_embedder = AxialPositionalEmbedding(256, (256, 64))

In [23]:
inp = toVTensor(np.random.rand(2, 3, 256, 256))
out = CNN(inp)
out = out.view(1, 128, 256)
out = out + pos_embedder(out) + fmap_embedder(out)

print(out.shape)

out = Decoder(out)
print(out.shape)

torch.Size([1, 128, 256])
torch.Size([1, 128, 256])


In [None]:
inp = toVTensor(np.random.rand(1, 3, 256, 256))
out = CNN(inp)
final = []
for n, image in enumerate(out):
    result = []
    for f, featuremap in enumerate(image):
        featuremap = torch.cat((featuremap.reshape(256), featuremap_embedder(torch.Tensor([f]).long().squeeze(0).cuda()), sequence_position_embedder(torch.Tensor([n]).long().squeeze(0).cuda())), dim=0)
        result.append(featuremap.detach().cpu().numpy()) ##TODO this cannot be detached here, need to figure out how to do without
    final.append(result)

final = torch.Tensor(np.asarray(final))
print(final.shape)
print(final[0][0].shape)

out = Decoder(final)

torch.Size([1, 64, 512])
torch.Size([512])


In [11]:
out = Decoder(toVTensor(np.random.rand(1, 128, 512)))

In [12]:
print(out.shape)

torch.Size([1, 128, 512])


In [7]:
dsconfig={
    "train_batch_size":4,
    "optimizer": {
        "type": "Adam",
        "params": {
          "lr": 0.001,
          "betas": [0.8, 0.999],
          "eps": 1e-8,
          "weight_decay": 3e-7
    }
  },
    
    "fp16": {
        "enabled": True,
        "loss_scale": 0,
        "initial_scale_power": 32,
        "loss_scale_window": 1000,
        "hysteresis": 2,
        "min_loss_scale": 1
    },
    
    "zero_optimization": {
        "stage": 2,
        "allgather_partitions": True,
        "allgather_bucket_size": 5e8,
        "overlap_comm": False,
        "reduce_scatter": True,
        "reduce_bucket_size": 5e8,
        "contiguous_gradients" : False,
        "cpu_offload": False,
        "cpu_offload_params" : False,
        "cpu_offload_use_pin_memory" : False,
        "stage3_max_live_parameters" : 1e9,
        "stage3_max_reuse_distance" : 1e9,
        "stage3_prefetch_bucket_size" : 5e8,
        "stage3_param_persistence_threshold" : 1e6,
        "sub_group_size" : 1e12
        },
    
    "logging":{
        "steps_per_print":100,
        "wall_clock_breakdown":True
    }
}

## With the lucid Reformer, crashes the kernal

In [None]:
class Decoder(nn.Module):
    def __init__(self, dim, depth=6, heads=8, max_seq_len=16384, bucket_size=64):
        super(Decoder, self).__init__()
        
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.max_seq_len = max_seq_len
        self.bucket_size = bucket_size

        self.decoder = ReformerLM(
            dim = self.dim,
            depth = self.depth,
            heads = self.heads,
            max_seq_len = self.max_seq_len, # ~10 seconds
            bucket_size = self.bucket_size,
            causal = True,
            embed = False,
            return_embeddings = True #return the output of the last attention layer, the keys; otherwise would get a softmax activation of vocab dict distribution
        ).cuda()
        
        self.pos_embedder = AxialPositionalEmbedding(256, (256, 64))
        self.fmap_embedder = AxialPositionalEmbedding(256, (256, 64))
    
    #@autocast()
    def forward(self, x):
        
        self.out = x + self.pos_embedder(x)
        
        #Positional Embedding
        for b in range(len(self.out)): #batch
            for i in range(int(len(self.out[b])/64)): #vector embeddings in a batch
                self.out[b][i*64:(i+1)*64] = self.fmap_embedder(self.out[b][i*64:(i+1)*64].unsqueeze(0)).squeeze(0)
                
        self.out = self.decoder(x)

        return self.out
    

class Input_Conv(nn.Module):
    def __init__(self):
        super(Input_Conv, self).__init__()
        
        # Initialize the DenseBlock, input shape is (n, 3, 256, 256), output shape is (n, 64, 16, 16)
        self.denseblock = torchvision.models.densenet121()
        self.denseblock.features.transition1.conv = nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.denseblock.features.transition1.pool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)
        self.denseblock = nn.Sequential(*list(self.denseblock.features.children())[:6])
    
    @autocast()
    def forward(self, x):
        return self.denseblock(x)
    

class Output_ConvTranspose(nn.Module):
    def __init__(self):
        super(Output_ConvTranspose, self).__init__()
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.upsample = nn.Upsample(scale_factor=2)
        
        self.conv1 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[5,5], stride=1, padding=1)  
        self.conv2 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[5,5], stride=1, padding=1)  
        self.conv3 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[5,5], stride=1, padding=1)  
        self.conv4 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[5,5], stride=1, padding=1)
        self.conv5 = nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=[1,1], stride=1, padding=0)
    
    @autocast()
    def forward(self, x):
        # input size (1, 64, 16, 16)
        
        self.out = self.conv1(x)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv2(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv3(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv4(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv5(self.out)
        self.out = self.sigmoid(self.out)
        
        return self.out
        

class Generator(nn.Module):
    def __init__(self, dim, depth=6, heads=8, max_seq_len=16384, bucket_size=64):
        super(Generator, self).__init__()
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.max_seq_len = max_seq_len
        self.bucket_size = bucket_size
        
        self.inputconv = Input_Conv()
        self.reformer = Decoder(dim=self.dim, depth=self.depth, heads=self.heads, max_seq_len=self.max_seq_len, bucket_size=self.bucket_size)
        self.outputconvtranspose = Output_ConvTranspose()
    
    @autocast()
    def forward(self, x):
        #input shape is (b, n, c, h, w)
        self.out = []
        for b in x:
            for n in b:
                self.out.append(self.inputconv(n.unsqueeze(0)).cpu().detach().numpy())
        self.out = torch.Tensor(self.out)
        
        self.unflattened_shape = self.out.shape
        self.out = self.out.view(x.shape[0], self.max_seq_len, self.dim) #TODO padding for variable sequence length input
        
        self.out = self.reformer(self.out)
        self.out = self.out.view(self.unflattened_shape)
        
        self.out = []
        for b in self.out:
            for n in b:
                self.out.append(self.outputconvtranspose(n.unsqueeze(0)))
        self.out = torch.Tensor(self.out)
        
        return self.out       

In [2]:
## Using Huggingface's Reformer implementation

In [9]:
# Initializing a Reformer configuration
configuration = ReformerConfig(attention_head_size=64, attn_layers=['local', 'lsh', 'local', 'lsh', 'local', 'lsh'], axial_norm_std=1.0, axial_pos_embds=True, axial_pos_shape=[64, 64], 
                               axial_pos_embds_dim=[64, 192], chunk_size_lm_head=0, eos_token_id=2, feed_forward_size=256, hash_seed=None, hidden_act='relu', hidden_dropout_prob=0.05, 
                               hidden_size=256, initializer_range=0.02, is_decoder=False, layer_norm_eps=1e-12, local_num_chunks_before=1, local_num_chunks_after=0, 
                               local_attention_probs_dropout_prob=0.05, local_attn_chunk_length=64, lsh_attn_chunk_length=64, lsh_attention_probs_dropout_prob=0.0, lsh_num_chunks_before=1, 
                               lsh_num_chunks_after=0, max_position_embeddings=4096, num_attention_heads=12, num_buckets=None, num_hashes=1, pad_token_id=0, vocab_size=320, tie_word_embeddings=False, 
                               use_cache=True)

# Initializing a Reformer model
model = ReformerModel.from_pretrained('google/reformer-crime-and-punishment')
tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')

ValueError: Connection error, and we cannot find the requested files in the cached path. Please try again or make sure your Internet connection is on.

In [5]:
class Decoder(nn.Module):
    def __init__(self, dim, depth=6, heads=8, max_seq_len=16384, bucket_size=64):
        super(Decoder, self).__init__()
        
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.max_seq_len = max_seq_len
        self.bucket_size = bucket_size

        # Initializing a Reformer configuration
        self.configuration = ReformerConfig(attention_head_size=64, attn_layers=['local', 'lsh', 'local', 'lsh', 'local', 'lsh'], axial_norm_std=1.0, axial_pos_embds=True, axial_pos_shape=[256, 64], 
                                       axial_pos_embds_dim=[64, 192], chunk_size_lm_head=0, eos_token_id=2, feed_forward_size=256, hash_seed=None, hidden_act='relu', hidden_dropout_prob=0.05, 
                                       hidden_size=256, initializer_range=0.02, is_decoder=True, layer_norm_eps=1e-12, local_num_chunks_before=1, local_num_chunks_after=0, 
                                       local_attention_probs_dropout_prob=0.05, local_attn_chunk_length=64, lsh_attn_chunk_length=64, lsh_attention_probs_dropout_prob=0.0, lsh_num_chunks_before=1, 
                                       lsh_num_chunks_after=0, max_position_embeddings=16384, num_attention_heads=self.heads, num_buckets=None, num_hashes=1, pad_token_id=0, vocab_size=320, 
                                       tie_word_embeddings=False, use_cache=False, target_mapping=None)

        # Initializing a Reformer model
        self.decoder = ReformerModel(self.configuration)
        
        # self.pos_embedder = AxialPositionalEmbedding(256, (256, 64))
        self.fmap_embedder = AxialPositionalEmbedding(256, (256, 64))
    
    @autocast()
    def forward(self, x):
        
        # self.out = x + self.pos_embedder(x)
        self.out = x
        
        #Positional Embedding
        for b in range(len(self.out)): #batch
            for i in range(int(len(self.out[b])/64)): #vector embeddings in a batch
                self.out[b][i*64:(i+1)*64] = self.fmap_embedder(self.out[b][i*64:(i+1)*64].unsqueeze(0)).squeeze(0)
        
        print(self.out.shape)
        self.out = self.decoder(inputs_embeds=self.out)

        return self.out.last_hidden_state
    

class Input_Conv(nn.Module):
    def __init__(self):
        super(Input_Conv, self).__init__()
        
        # Initialize the DenseBlock, input shape is (n, 3, 256, 256), output shape is (n, 64, 16, 16)
        self.denseblock = torchvision.models.densenet121()
        self.denseblock.features.transition1.conv = nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.denseblock.features.transition1.pool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)
        self.denseblock = nn.Sequential(*list(self.denseblock.features.children())[:6])
    
    @autocast()
    def forward(self, x):
        return self.denseblock(x)
    

class Output_ConvTranspose(nn.Module):
    def __init__(self):
        super(Output_ConvTranspose, self).__init__()
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.upsample = nn.Upsample(scale_factor=2)
        
        self.conv1 = nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=[3,3], stride=1, padding=1)  
        self.conv2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=[3,3], stride=1, padding=1)  
        self.conv3 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[3,3], stride=1, padding=1)  
        self.conv4 = nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=[3,3], stride=1, padding=1)
        self.conv5 = nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=[1,1], stride=1, padding=0)
    
    @autocast()
    def forward(self, x):
        # input size (1, 64, 16, 16)
        
        self.out = self.conv1(x)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv2(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv3(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv4(self.out)
        self.out = self.relu(self.out)  
        self.out = self.upsample(self.out)
        
        self.out = self.conv5(self.out)
        self.out = self.sigmoid(self.out)
        
        return self.out
        

class Generator(nn.Module):
    def __init__(self, dim, depth=6, heads=8, max_seq_len=16384, bucket_size=64):
        super(Generator, self).__init__()
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.max_seq_len = max_seq_len
        self.bucket_size = bucket_size
        
        self.inputconv = Input_Conv()
        self.reformer = Decoder(dim=self.dim, depth=self.depth, heads=self.heads, max_seq_len=self.max_seq_len, bucket_size=self.bucket_size)
        self.outputconvtranspose = Output_ConvTranspose()
    
    @autocast()
    def forward(self, x):
        #input shape is (b, n, c, h, w)
        self.out = []
        for b in x:
            for n in b:
                self.out.append(self.inputconv(n.unsqueeze(0)).squeeze(0).cpu().detach().numpy())
        self.out = torch.Tensor(self.out).cuda()
        
        self.unflattened_shape = self.out.shape
        self.out = self.out.view(x.shape[0], self.max_seq_len, self.dim) #TODO padding for variable sequence length input
        
        self.out = self.reformer(self.out)
        print(self.out.shape)
        self.out = self.out.view(1, 256, 128, 16, 16) #TODO this need to be changed for adaptive sizing
        
        self.outarray = []
        for b in self.out:
            for n in b:
                self.outarray.append(self.outputconvtranspose(n.unsqueeze(0)).squeeze(0))
        #self.out = torch.Tensor(self.outarray)
        
        return self.outarray     

In [6]:
G = Generator(dim=256).cuda()

In [5]:
for i, imgs in enumerate(loader):
    inp = Variable(imgs[0]).cuda()
    with autocast():
        out = G(inp)
    print(out[1].shape)

torch.Size([1, 16384, 256])


config.num_buckets is not set. Setting config.num_buckets to [16, 32]...
config.num_buckets is not set. Setting config.num_buckets to [16, 32]...
config.num_buckets is not set. Setting config.num_buckets to [16, 32]...


torch.Size([1, 16384, 512])
torch.Size([3, 256, 256])
torch.Size([1, 16384, 256])
torch.Size([1, 16384, 512])


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 8.00 GiB total capacity; 5.83 GiB already allocated; 6.97 MiB free; 5.83 GiB reserved in total by PyTorch)

In [8]:
#dsconfig = json.loads(dsconfig)
model = torchvision.models.densenet121()
model_engine, optimizer, _, _ = deepspeed.initialize(args=dsconfig, model=model, model_parameters=model.parameters)

[2021-03-15 20:07:26,167] [INFO] [logging.py:60:log_dist] [Rank -1] DeepSpeed info: version=0.3.12, git-hash=unknown, git-branch=unknown
[2021-03-15 20:07:26,169] [INFO] [distributed.py:37:init_distributed] Not using the DeepSpeed or torch.distributed launchers, attempting to detect MPI environment...


CalledProcessError: Command '['hostname -I']' returned non-zero exit status 1.