In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import utils, layers
import h5py
import torch
from torch import nn
from torch.nn import functional as F


import warnings
warnings.filterwarnings("ignore")

In [None]:
class BicubicDownSample(nn.Module):
    def bicubic_kernel(self, x, a=-0.50):
        """
        This equation is exactly copied from the website below:
        https://clouard.users.greyc.fr/Pantheon/experiments/rescaling/index-en.html#bicubic
        """
        abs_x = torch.abs(x)
        if abs_x <= 1.:
            return (a + 2.) * torch.pow(abs_x, 3.) - (a + 3.) * torch.pow(abs_x, 2.) + 1
        elif 1. < abs_x < 2.:
            return a * torch.pow(abs_x, 3) - 5. * a * torch.pow(abs_x, 2.) + 8. * a * abs_x - 4. * a
        else:
            return 0.0

    def __init__(self, factor=4, cuda=True, padding='reflect'):
        super().__init__()
        self.factor = factor
        size = factor * 4
        k = torch.tensor([self.bicubic_kernel((i - torch.floor(torch.tensor(size / 2)) + 0.5) / factor)
                          for i in range(size)], dtype=torch.float32)
        k = k / torch.sum(k)
        # k = torch.einsum('i,j->ij', (k, k))
        k1 = torch.reshape(k, shape=(1, 1, size, 1))
        self.k1 = torch.cat([k1, k1, k1], dim=0)
        k2 = torch.reshape(k, shape=(1, 1, 1, size))
        self.k2 = torch.cat([k2, k2, k2], dim=0)
        self.cuda = '.cuda' if cuda else ''
        self.padding = padding
        for param in self.parameters():
            param.requires_grad = False

    def forward(self, x, nhwc=False, clip_round=False, byte_output=False):
        # x = torch.from_numpy(x).type('torch.FloatTensor')
        filter_height = self.factor * 4
        filter_width = self.factor * 4
        stride = self.factor

        pad_along_height = max(filter_height - stride, 0)
        pad_along_width = max(filter_width - stride, 0)
        filters1 = self.k1.type('torch{}.FloatTensor'.format(self.cuda))
        filters2 = self.k2.type('torch{}.FloatTensor'.format(self.cuda))

        # compute actual padding values for each side
        pad_top = pad_along_height // 2
        pad_bottom = pad_along_height - pad_top
        pad_left = pad_along_width // 2
        pad_right = pad_along_width - pad_left

        # apply mirror padding
        if nhwc:
            x = torch.transpose(torch.transpose(
                x, 2, 3), 1, 2)   # NHWC to NCHW

        # downscaling performed by 1-d convolution
        x = F.pad(x, (0, 0, pad_top, pad_bottom), self.padding)
        x = F.conv2d(input=x, weight=filters1, stride=(stride, 1), groups=3)
        if clip_round:
            x = torch.clamp(torch.round(x), 0.0, 255.)

        x = F.pad(x, (pad_left, pad_right, 0, 0), self.padding)
        x = F.conv2d(input=x, weight=filters2, stride=(1, stride), groups=3)
        if clip_round:
            x = torch.clamp(torch.round(x), 0.0, 255.)

        if nhwc:
            x = torch.transpose(torch.transpose(x, 1, 3), 1, 2)
        if byte_output:
            return x.type('torch.ByteTensor'.format(self.cuda))
        else:
            return x

with h5py.File('Galaxy10_DECals_downsampled.h5', 'r') as f:
#with h5py.File('Galaxy10_DECals.h5', 'r') as f:
    imgs256 = np.array(f['images']) # read images

    #First we delete these three images because they contain only NaN
    imgs256 = np.delete(imgs256, 15422, 0)
    imgs256 = np.delete(imgs256, 12659, 0)
    imgs256 = np.delete(imgs256, 11492, 0)
    labels = np.array(f['ans']) # read labels
    labels = np.delete(labels, 15422, 0)
    labels = np.delete(labels, 12659, 0)
    labels = np.delete(labels, 11492, 0)
    label_text = [ 'Disturbed Galaxies',
                    'Merging Galaxies',
                    'Round Smooth Galaxies',
                    'In-between Round Smooth Galaxies',
                    'Cigar Shaped Smooth Galaxies',
                    'Barred Spiral Galaxies',
                    'Unbarred Tight Spiral Galaxies',
                    'Unbarred Loose Spiral Galaxies',
                    'Edge-on Galaxies without Bulge',
                    'Edge-on Galaxies with Bulge',]
    
print(np.shape(labels))
print(np.shape(imgs256))
"""
#DOWNSAMPLING TO 128
# Initialize the BicubicDownSample module
downsampler2 = BicubicDownSample(factor=2, cuda=False)  # Set cuda=True if using GPU

# Create an array to hold the downsampled images
num_images = imgs256.shape[0]
imgs128 = np.zeros((num_images, 128, 128, 3), dtype=np.float32)  # Assuming the target size is (64, 64, 3)

# Loop over each image and downsample
for i in range(num_images):
    image_np = imgs256[i]  # Shape (256, 256, 3)
    image_tensor = torch.tensor(image_np).permute(2, 0, 1).unsqueeze(0).float()  # Shape (1, 3, 256, 256)

    with torch.no_grad():  # No need to calculate gradients
        downsampled_tensor = downsampler2(image_tensor)

    downsampled_image_tensor = downsampled_tensor.squeeze(0)  # Shape (3, 64, 64)
    downsampled_image_np = downsampled_image_tensor.permute(1, 2, 0).numpy()  # Shape (64, 64, 3)

    imgs128[i] = downsampled_image_np

#DOWNSAMPLING TO 64
# Initialize the BicubicDownSample module
downsampler4 = BicubicDownSample(factor=4, cuda=False)  # Set cuda=True if using GPU

# Create an array to hold the downsampled images
num_images = imgs256.shape[0]
imgs64 = np.zeros((num_images, 64, 64, 3), dtype=np.float32)  # Assuming the target size is (64, 64, 3)

# Loop over each image and downsample
for i in range(num_images):
    image_np = imgs256[i]  # Shape (256, 256, 3)
    image_tensor = torch.tensor(image_np).permute(2, 0, 1).unsqueeze(0).float()  # Shape (1, 3, 256, 256)

    with torch.no_grad():  # No need to calculate gradients
        downsampled_tensor = downsampler4(image_tensor)

    downsampled_image_tensor = downsampled_tensor.squeeze(0)  # Shape (3, 64, 64)
    downsampled_image_np = downsampled_image_tensor.permute(1, 2, 0).numpy()  # Shape (64, 64, 3)

    imgs64[i] = downsampled_image_np
"""
#DOWNSAMPLING TO 32
# Initialize the BicubicDownSample module
downsampler8 = BicubicDownSample(factor=2, cuda=False)  # Set cuda=True if using GPU

# Create an array to hold the downsampled images
num_images = imgs256.shape[0]
imgs32 = np.zeros((num_images, 32, 32, 3), dtype=np.float32)  # Assuming the target size is (64, 64, 3)

# Loop over each image and downsample
for i in range(num_images):
    image_np = imgs256[i]  # Shape (256, 256, 3)
    image_tensor = torch.tensor(image_np).permute(2, 0, 1).unsqueeze(0).float()  # Shape (1, 3, 256, 256)

    with torch.no_grad():  # No need to calculate gradients
        downsampled_tensor = downsampler8(image_tensor)

    downsampled_image_tensor = downsampled_tensor.squeeze(0)  # Shape (3, 64, 64)
    downsampled_image_np = downsampled_image_tensor.permute(1, 2, 0).numpy()  # Shape (64, 64, 3)

    imgs32[i] = downsampled_image_np

# Save the downsampled images to an HDF5 file (optional)
#with h5py.File('Galaxy10_DECals_128.h5', 'w') as f:
#    f.create_dataset('images', data=imgs128)

#print(np.shape(imgs128))
#print(np.shape(imgs64))
print(np.shape(imgs32))

# Save the cleaned data to a new HDF5 file
with h5py.File('Galaxy10_DECals_32.h5', 'w') as f:
    f.create_dataset('images', data=imgs32)
    f.create_dataset('ans', data=labels)
    # Optionally, you can also store the label_text
    dt = h5py.string_dtype(encoding='utf-8')
    f.create_dataset('label_text', data=label_text, dtype=dt)

print("Data has been saved to Galaxy10_DECals_downsampled_cleaned.h5")