# Converting .npy data format files

Converting files into class folders for use with standard processing pipeline.

## Imports

In [43]:
# Genearl
import os
from os import path
import shutil
from tqdm.notebook import tqdm

# Data processing
import numpy as np

# Image
import PIL
from PIL import Image

## Constants and paths


In [63]:
FILE_FORMAT = ".png"

numpy_data_path = "./numpy-data"
out_data_path = "./data"
out_train_path = path.join(out_data_path, "train")
out_test_path = path.join(out_data_path, "test")

class_map = {
    0: 'covid',
    1: 'normal',
}

x_y_paths = {
    'x_train': path.join(numpy_data_path, "Btrain_images.npy"),
    'y_train': path.join(numpy_data_path, "Btrain_labels.npy"),
    'x_test': path.join(numpy_data_path, "test_images.npy"),
    'y_test': path.join(numpy_data_path, "test_labels.npy"),
    'x_valid': path.join(numpy_data_path, "val_images.npy"),
    'y_valid': path.join(numpy_data_path, "val_labels.npy"),
}

## Get datasets

In [64]:
x_y_npy = {k: np.load(v) for k,v in x_y_paths.items()}

## Setup output folder

In [73]:
def setup_output_folder():
    # Clear out_data_path
    shutil.rmtree(out_data_path, ignore_errors=True)
    os.makedirs(out_data_path, exist_ok=True)

    # Create train and test with class paths
    for label in class_map.values():
        os.makedirs(path.join(out_test_path, label))
        os.makedirs(path.join(out_train_path, label))

## Convert

In [75]:
def get_image_from_npy(ndarray, i):
    im = ndarray[i]
    im = (im*255).astype(np.uint8)
    return Image.fromarray(im)

def save_image_from_npy(folder, npy, idx, prefix):
    im = get_image_from_npy(npy, idx)
    _path = path.join(folder, "{prefix}-{:04d}{file_format}".format(idx+1, prefix=prefix, file_format=FILE_FORMAT))
    im.save(_path)

In [78]:
def convert_to_folder(x, y, prefix, category="train"):
    for i, _y in tqdm(enumerate(y)):
        label = class_map[_y]
        save_image_from_npy(path.join(out_data_path, category, label), x, i, prefix)

In [79]:
setup_output_folder()
convert_to_folder(x_y_npy['x_train'], x_y_npy['y_train'], "train", category="train")
convert_to_folder(x_y_npy['x_valid'], x_y_npy['y_valid'], "valid", category="train")
convert_to_folder(x_y_npy['x_test'], x_y_npy['y_test'], "test", category="test")

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]