In [1]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():
    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print("There are %d GPU(s) available." % torch.cuda.device_count())

    print("We will use the GPU:", torch.cuda.get_device_name(0))

# If not...
elif torch.backends.mps.is_available():
    device = torch.device("mps")

    print("Using mps backend")
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA A100-SXM4-80GB


In [2]:
from transformers import AutoTokenizer, AutoProcessor
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import requests
from PIL import Image
import numpy as np
from io import BytesIO
from diffusers import (
    KandinskyV22Pipeline,
    KandinskyV22PriorEmb2EmbPipeline,
    KandinskyV22PriorPipeline,
)
from diffusers.utils import load_image
from torchvision.transforms import ToPILImage

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import CLIPTextModelWithProjection, CLIPVisionModelWithProjection, BertModel
import torch.nn as nn
from transformers import AdamW
import os


class T2IModel(nn.Module):
    def __init__(self):
        super(T2IModel, self).__init__()
        self.text_model = CLIPTextModelWithProjection.from_pretrained(
            "kandinsky-community/kandinsky-2-2-prior", subfolder="text_encoder"
        )
        self.vision_model = CLIPVisionModelWithProjection.from_pretrained(
            "kandinsky-community/kandinsky-2-2-prior", subfolder="image_encoder"
        )
        # Adjust the input features of the FC layer to the combined size of text and vision outputs
        self.fc = nn.Linear(self.text_model.config.hidden_size + self.vision_model.config.projection_dim, 1280)

        # Initialize the pipeline for the Kandinsky V2.2 decoder
        self.pipe = KandinskyV22Pipeline.from_pretrained(
            "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
        )

    def initialize_optimizer(self):
        params = (
            list(self.fc.parameters())
        )
        optimizer = AdamW(params, lr=1e-4)
        return optimizer

    def forward(self, input_imgs, input_txt, attention_mask=None):
        text_outputs = self.text_model(input_txt, attention_mask=attention_mask)
        text_embeds = text_outputs.last_hidden_state[:, 0, :]  # Use the representation of the [CLS] token

        vision_outputs = self.vision_model(input_imgs)
        vision_embeds = vision_outputs.image_embeds

        combined_embeds = torch.cat((vision_embeds, text_embeds), dim=1)
        x = self.fc(combined_embeds)
        return x

    def output_embedding(self, target_images):
        target_image_output = self.vision_model(target_images)
        target_image_embeds = target_image_output.image_embeds
        return target_image_embeds

    def custom_loss(self, output_embeddings, target_embeddings):
        mse_loss = nn.MSELoss()
        loss = mse_loss(output_embeddings, target_embeddings)

        return loss

    def save_model(self, output_dir="../model_save/", filename="model_checkpoint.pt"):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        file_path = output_dir + filename
        print("Saving model to %s" % file_path)

        torch.save(model.state_dict(), file_path)

    def get_cos(self, input1, input2):
        cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
        similarity = cos(input1, input2)
        avg = torch.sum(similarity) / len(similarity)
        return avg

    def metrics(self, input1, input2):
        cos = self.get_cos(input1, input2)
        return [cos]

    def visualization(self, input_img, instruction, instruction_attention_mask, filename, negative_instruction=None, negative_instruction_attention_mask=None):
        # Generate output embeddings with the provided attention mask
        output_embeddings = self.forward(input_img, instruction, attention_mask=instruction_attention_mask)

        # Handle the negative instruction if provided
        neg_image_embed = None
        if negative_instruction is not None and negative_instruction_attention_mask is not None:
            neg_image_embed = self.forward(input_img, negative_instruction, attention_mask=negative_instruction_attention_mask)
        else:
            # If no negative instruction is provided, we'll use a tensor of zeros as a placeholder
            neg_image_embed = torch.zeros_like(output_embeddings)

        self.pipe.to(device)
        # Generate the image using the pipeline
        image = self.pipe(
            image_embeds=output_embeddings,
            negative_image_embeds=neg_image_embed,
            height=768,
            width=768,
            num_inference_steps=100,
        ).images

        # Save the generated image
        image[0].save(filename)

model = T2IModel()
model.to(device=device)

Loading pipeline components...: 100%|█████████████| 3/3 [00:01<00:00,  2.40it/s]


T2IModel(
  (text_model): CLIPTextModelWithProjection(
    (text_model): CLIPTextTransformer(
      (embeddings): CLIPTextEmbeddings(
        (token_embedding): Embedding(49408, 1280)
        (position_embedding): Embedding(77, 1280)
      )
      (encoder): CLIPEncoder(
        (layers): ModuleList(
          (0-31): 32 x CLIPEncoderLayer(
            (self_attn): CLIPAttention(
              (k_proj): Linear(in_features=1280, out_features=1280, bias=True)
              (v_proj): Linear(in_features=1280, out_features=1280, bias=True)
              (q_proj): Linear(in_features=1280, out_features=1280, bias=True)
              (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
            )
            (layer_norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
            (mlp): CLIPMLP(
              (activation_fn): GELUActivation()
              (fc1): Linear(in_features=1280, out_features=5120, bias=True)
              (fc2): Linear(in_features=5120, out_f

In [4]:
import torch

def load_model_from_checkpoint(model, checkpoint_path, device='cuda'):
    """
    Load a PyTorch model from a saved checkpoint.
    
    Parameters:
    - model (torch.nn.Module): The model architecture (untrained).
    - checkpoint_path (str): Path to the saved model checkpoint (.pth file).
    - device (str): Device to which the model should be loaded ('cuda' or 'cpu').

    Returns:
    - model (torch.nn.Module): Model populated with the loaded weights.
    """

    # Load the model state dictionary from the specified path
    state_dict = torch.load(checkpoint_path, map_location=device)
    
    # Load the state dictionary into the model
    model.load_state_dict(state_dict)
    
    # Move the model to the desired device
    model.to(device)
    
    return model

# Usage
loaded_model = load_model_from_checkpoint(model, 'magicbrush_kadinsky_imagewithinstruction_10epochs_full_v1.pth', device='cuda')


In [7]:
def compute_max_instruction_length(dataloader, tokenizer):
    """
    Compute the maximum instruction length from the dataloader batches.

    Args:
    - dataloader (DataLoader): DataLoader containing your data.
    - tokenizer: Tokenizer used to tokenize the instructions.

    Returns:
    - int: Maximum instruction length.
    """
    max_len = 0
    
    for batch in dataloader:
        instructions = batch[1]  # Assuming instructions are in position 1 in your batch
        for instruction in instructions:
            decoded_string = tokenizer.decode(instruction)
            tokens = tokenizer.tokenize(decoded_string)
            length = len(tokens)
            if length > max_len:
                max_len = length
                
    return max_len

def custom_encode(instruction, tokenizer):
    encoded_inst = tokenizer.encode_plus(
        instruction,  # Sentence to encode.
        add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
        return_tensors="pt",  # Return pytorch tensors
    )
    
    return encoded_inst["input_ids"]
    

In [16]:
import os
import json

file_path = '/scratch/nkusumba/test/edit_sessions.json'

with open(file_path, 'r') as file:
    json_data = json.load(file)

# Printing each key and its corresponding value
dic = {}
for key, value in json_data.items():
    dic[key] = value[0]['instruction']

images_path = '/scratch/nkusumba/test/images/'
os.makedirs('/scratch/nkusumba/test/outputs/', exist_ok=True)

tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")


print('Started Evaluation')
count = -1
dir_name = []
for dirpath, dirname, filenames in os.walk(images_path):
    if count == -1:
        count = 0
        dir_name = dirname
    if count == 100:
        print('Process done!!!')
        break
    input_path = ''
    output_path = ''
    for file in filenames:
        filepath = os.path.join(dirpath, file)
        if filepath.endswith('input.png'):
            input_path = filepath
        elif filepath.endswith('output1.png'):
            output_path = filepath
    if input_path == '':
        continue
    print(f'Processing {count+1}th image')
    dir = f'/scratch/nkusumba/test/outputs/{count+1}'
    os.makedirs(dir, exist_ok=True)
    out_img = Image.open(output_path)
    out_img.save(f'{dir}/groundtruth.png')

    # Process the image
    img = Image.open(input_path)
    inputs = processor(images=img, return_tensors="pt")
    input_image = inputs["pixel_values"].to(device)
    img.save(f'{dir}/input_image.png')

    # Process the instruction
    instruction = dic[dir_name[count]]
    encoded_instruction = custom_encode(instruction, tokenizer).to(device)
    instruction_attention_mask = torch.zeros(encoded_instruction.shape, dtype=torch.long).to(device)
    instruction_attention_mask[encoded_instruction != tokenizer.pad_token_id] = 1
    with open(f'{dir}/instruction.txt', 'w') as f:
        f.write(instruction)

    # If 'instruction_1' is an empty or alternative string, prepare it similarly
    instruction_1 = ""
    encoded_instruction_1 = custom_encode(instruction_1, tokenizer).to(device)
    instruction_1_attention_mask = torch.zeros(encoded_instruction_1.shape, dtype=torch.long).to(device)
    instruction_1_attention_mask[encoded_instruction_1 != tokenizer.pad_token_id] = 1

    # Visualize the output
    loaded_model.visualization(
        input_img=input_image,
        instruction=encoded_instruction,  
        instruction_attention_mask=instruction_attention_mask,  
        filename=f'{dir}/output.png',
        negative_instruction=encoded_instruction_1,  
        negative_instruction_attention_mask=instruction_1_attention_mask
    )
    print(f'Finished processing {count+1}th image')
    count += 1


Started Evaluation
Processing 1th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.39it/s]


Finished processing 1th image
Processing 2th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.36it/s]


Finished processing 2th image
Processing 3th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.38it/s]


Finished processing 3th image
Processing 4th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.32it/s]


Finished processing 4th image
Processing 5th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.78it/s]


Finished processing 5th image
Processing 6th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.28it/s]


Finished processing 6th image
Processing 7th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.28it/s]


Finished processing 7th image
Processing 8th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.67it/s]


Finished processing 8th image
Processing 9th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.25it/s]


Finished processing 9th image
Processing 10th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.29it/s]


Finished processing 10th image
Processing 11th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.37it/s]


Finished processing 11th image
Processing 12th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.39it/s]


Finished processing 12th image
Processing 13th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.27it/s]


Finished processing 13th image
Processing 14th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.32it/s]


Finished processing 14th image
Processing 15th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.44it/s]


Finished processing 15th image
Processing 16th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.90it/s]


Finished processing 16th image
Processing 17th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.96it/s]


Finished processing 17th image
Processing 18th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.68it/s]


Finished processing 18th image
Processing 19th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.39it/s]


Finished processing 19th image
Processing 20th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.41it/s]


Finished processing 20th image
Processing 21th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.34it/s]


Finished processing 21th image
Processing 22th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.21it/s]


Finished processing 22th image
Processing 23th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 18.85it/s]


Finished processing 23th image
Processing 24th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.95it/s]


Finished processing 24th image
Processing 25th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.94it/s]


Finished processing 25th image
Processing 26th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.43it/s]


Finished processing 26th image
Processing 27th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.48it/s]


Finished processing 27th image
Processing 28th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.41it/s]


Finished processing 28th image
Processing 29th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.51it/s]


Finished processing 29th image
Processing 30th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.33it/s]


Finished processing 30th image
Processing 31th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.39it/s]


Finished processing 31th image
Processing 32th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.45it/s]


Finished processing 32th image
Processing 33th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.41it/s]


Finished processing 33th image
Processing 34th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.44it/s]


Finished processing 34th image
Processing 35th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.46it/s]


Finished processing 35th image
Processing 36th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.36it/s]


Finished processing 36th image
Processing 37th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.46it/s]


Finished processing 37th image
Processing 38th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.44it/s]


Finished processing 38th image
Processing 39th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.41it/s]


Finished processing 39th image
Processing 40th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.41it/s]


Finished processing 40th image
Processing 41th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.31it/s]


Finished processing 41th image
Processing 42th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.29it/s]


Finished processing 42th image
Processing 43th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.80it/s]


Finished processing 43th image
Processing 44th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.16it/s]


Finished processing 44th image
Processing 45th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.95it/s]


Finished processing 45th image
Processing 46th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.25it/s]


Finished processing 46th image
Processing 47th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.13it/s]


Finished processing 47th image
Processing 48th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.12it/s]


Finished processing 48th image
Processing 49th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.16it/s]


Finished processing 49th image
Processing 50th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.09it/s]


Finished processing 50th image
Processing 51th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.11it/s]


Finished processing 51th image
Processing 52th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.13it/s]


Finished processing 52th image
Processing 53th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.03it/s]


Finished processing 53th image
Processing 54th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.11it/s]


Finished processing 54th image
Processing 55th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.20it/s]


Finished processing 55th image
Processing 56th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.97it/s]


Finished processing 56th image
Processing 57th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.12it/s]


Finished processing 57th image
Processing 58th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.86it/s]


Finished processing 58th image
Processing 59th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.08it/s]


Finished processing 59th image
Processing 60th image


100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 20.03it/s]


Finished processing 60th image
Processing 61th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.90it/s]


Finished processing 61th image
Processing 62th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.84it/s]


Finished processing 62th image
Processing 63th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.96it/s]


Finished processing 63th image
Processing 64th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.98it/s]


Finished processing 64th image
Processing 65th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.45it/s]


Finished processing 65th image
Processing 66th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.00it/s]


Finished processing 66th image
Processing 67th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.71it/s]


Finished processing 67th image
Processing 68th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.41it/s]


Finished processing 68th image
Processing 69th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.73it/s]


Finished processing 69th image
Processing 70th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.66it/s]


Finished processing 70th image
Processing 71th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.74it/s]


Finished processing 71th image
Processing 72th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.73it/s]


Finished processing 72th image
Processing 73th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.67it/s]


Finished processing 73th image
Processing 74th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.76it/s]


Finished processing 74th image
Processing 75th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.37it/s]


Finished processing 75th image
Processing 76th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.37it/s]


Finished processing 76th image
Processing 77th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.42it/s]


Finished processing 77th image
Processing 78th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.54it/s]


Finished processing 78th image
Processing 79th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.40it/s]


Finished processing 79th image
Processing 80th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.60it/s]


Finished processing 80th image
Processing 81th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.48it/s]


Finished processing 81th image
Processing 82th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.46it/s]


Finished processing 82th image
Processing 83th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.74it/s]


Finished processing 83th image
Processing 84th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.52it/s]


Finished processing 84th image
Processing 85th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.55it/s]


Finished processing 85th image
Processing 86th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.41it/s]


Finished processing 86th image
Processing 87th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.59it/s]


Finished processing 87th image
Processing 88th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.62it/s]


Finished processing 88th image
Processing 89th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.53it/s]


Finished processing 89th image
Processing 90th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.56it/s]


Finished processing 90th image
Processing 91th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.75it/s]


Finished processing 91th image
Processing 92th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.49it/s]


Finished processing 92th image
Processing 93th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.46it/s]


Finished processing 93th image
Processing 94th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.60it/s]


Finished processing 94th image
Processing 95th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 18.09it/s]


Finished processing 95th image
Processing 96th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.41it/s]


Finished processing 96th image
Processing 97th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.31it/s]


Finished processing 97th image
Processing 98th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.48it/s]


Finished processing 98th image
Processing 99th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 19.39it/s]


Finished processing 99th image
Processing 100th image


100%|█████████████████████████████████████████| 100/100 [00:05<00:00, 18.73it/s]


Finished processing 100th image
Process done!!!


In [17]:
!zip -r /scratch/nkusumba/test/kandinsky_outputs.zip /scratch/nkusumba/test/outputs

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  adding: scratch/nkusumba/test/outputs/ (stored 0%)
  adding: scratch/nkusumba/test/outputs/54/ (stored 0%)
  adding: scratch/nkusumba/test/outputs/54/input_image.png (deflated 0%)
  adding: scratch/nkusumba/test/outputs/54/output.png (deflated 0%)
  adding: scratch/nkusumba/test/outputs/54/groundtruth.png (deflated 0%)
  adding: scratch/nkusumba/test/outputs/54/instruction.txt (stored 0%)
  adding: scratch/nkusumba/test/outputs/22/ (stored 0%)
  adding: scratch/nkusumba/test/outputs/22/groundtruth.png (deflated 0%)
  adding: scratch/nkusumba/test/outputs/22/instruction.txt (stored 0%)
  adding: scratch/nkusumba/test/outputs/22/output.png (deflated 0%)
  adding: scratch/nkusumba/test/outputs/22/input_image.