In [20]:
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import h5py

# Definir os caminhos dos arquivos
file_x = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\SimSim_SOURCE_X_Illustris2_pristine.npy"
file_y = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\SimSim_SOURCE_y_Illustris2_pristine.npy"

# Carregar os arquivos usando np.load
data_x = np.load(file_x)
data_y = np.load(file_y)

# Diretório para as imagens
image_dir = "pristineSS"
os.makedirs(image_dir, exist_ok=True)

# Criar um dicionário para os metadados
metadata = {}

# Usar tqdm para a barra de progresso
for i in tqdm(range(data_x.shape[0]), desc="Salvando imagens e metadados"):
    image = data_x[i]
    label = data_y

    # Normaliza os dados da imagem para o intervalo [0, 1]
    if np.max(image) != np.min(image):
        image = (image - np.min(image)) / (np.max(image) - np.min(image))
    else:
        # Handle the case where max and min are equal
        image = np.zeros_like(image)

    # Verifique se os dados são em escala de cinza e converta para RGB
    if image.shape[0] == 1:
        image = plt.cm.viridis(image)

    # Reorganize a forma da imagem para (altura, largura, canais)
    image = np.transpose(image, (1, 2, 0))

    # Salva a imagem como um arquivo PNG
    filename = f"imagem_{i}.png"
    filepath = os.path.join(image_dir, filename)
    plt.imsave(filepath, image)

    # Adiciona a imagem e o rótulo ao dicionário de metadados
    metadata[f"imagem_{i}"] = {"rótulo": label, "arquivo": filename}

# Save the data to an HDF5 file
with h5py.File("pristineSS.hdf5", "w") as f:
    f.create_dataset("data_x", data=data_x)
    f.create_dataset("data_y", data=data_y)

Salvando imagens e metadados: 100%|█| 15426/15426 [00:21<00:00, 7


In [23]:
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import h5py

# Definir os caminhos dos arquivos
file_x = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\SimReal_SOURCE_X_Illustris0.npy"
file_y = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\SimReal_SOURCE_y_Illustris0.npy"

# Carregar os arquivos usando np.load
data_x = np.load(file_x)
data_y = np.load(file_y)

# Diretório para as imagens
image_dir = "pristineSR"
os.makedirs(image_dir, exist_ok=True)

# Criar um dicionário para os metadados
metadata = {}

# Usar tqdm para a barra de progresso
for i in tqdm(range(data_x.shape[0]), desc="Salvando imagens e metadados"):
    image = data_x[i]
    label = data_y[i]

    # Normaliza os dados da imagem para o intervalo [0, 1]
    image = (image - np.min(image)) / (np.max(image) - np.min(image))

    # Verifique se os dados são em escala de cinza e converta para RGB
    if image.shape[0] == 1:
        image = plt.cm.viridis(image)

    # Reorganize a forma da imagem para (altura, largura, canais)
    image = np.transpose(image, (1, 2, 0))

    # Salva a imagem como um arquivo PNG
    filename = f"imagem_{i}.png"
    filepath = os.path.join(image_dir, filename)
    plt.imsave(filepath, image)

    # Adiciona a imagem e o rótulo ao dicionário de metadados
    metadata[f"imagem_{i}"] = {"rótulo": label, "arquivo": filename}

# Salva os metadados como um arquivo HDF5
with h5py.File("pristineSR.hdf5", "w") as f:
    f.create_dataset("data_x", data=data_x)
    f.create_dataset("data_y", data=data_y)

Salvando imagens e metadados: 100%|█| 6000/6000 [00:20<00:00, 287


In [24]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.visualization import simple_norm
import json
from tqdm import tqdm
import h5py

# Caminho do arquivo FITS
file_fits = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\hlsp_deepmerge_hst_acs-wfc3_illustris-z2_f814w-f160w_v1_sim-pristine.fits"

# Abrir o arquivo FITS
hdulist = fits.open(file_fits)

# Acessar o cabeçalho do primeiro HDU (hdu[0])
header = hdulist[0].header

# Definir a seed para garantir a mesma seleção de imagens
# np.random.seed(206265)  # descomente para obter sempre a mesma seleção

# Selecionar 16 imagens aleatórias
example_ids = np.random.choice(hdulist[1].data.shape[0], 16)

# Criar um dicionário para armazenar as imagens
images_data = {}

# Loop pelas imagens selecionadas aleatoriamente e plotar com rótulos
for i in tqdm(range(len(example_ids)), desc="Salvando imagens"):  # Adicionando a barra de progresso
    # Pegar as imagens F814W (índice=0) e F160W (índice=1)
    image_f814w = hdulist[0].data[example_ids[i], 0, :, :]
    image_f160w = hdulist[0].data[example_ids[i], 1, :, :]

    # Normalizar as imagens
    norm_f814w = simple_norm(image_f814w, 'log', max_percent=99.75)
    norm_f160w = simple_norm(image_f160w, 'log', max_percent=99.75)

    # Adicionar as imagens e o rótulo ao dicionário
    images_data[f"imagem_{i}"] = {
        "f814w": image_f814w.tolist(),
        "f160w": image_f160w.tolist(),
        "merger": bool(hdulist[1].data[example_ids[i]][0])
    }

# Salvar as imagens em um arquivo HDF5
with h5py.File("pristineFITS.hdf5", "w") as f:
    for i in range(len(example_ids)):
        f.create_dataset(f"imagem_{i}/f814w", data=images_data[f"imagem_{i}"]["f814w"])
        f.create_dataset(f"imagem_{i}/f160w", data=images_data[f"imagem_{i}"]["f160w"])
        f.create_dataset(f"imagem_{i}/merger", data=images_data[f"imagem_{i}"]["merger"])

hdulist.close()

Salvando imagens: 100%|█████████| 16/16 [00:00<00:00, 252.99it/s]


In [25]:
import numpy as np
import pandas as pd
import glob
import os

# Importing Illustris catalog
catalog = pd.DataFrame(np.genfromtxt(r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\illustris_morphs_rf.txt", dtype=None, encoding='utf-8'))
catalog.columns = ['snap', 'subid', 'logmstar', 'cam', 'merger', 'rfprob', 'asym_I', 'cc_I', 'g_I', 'm20_I', 'mprime_I', 'asym_H', 'cc_H', 'g_H', 'm20_H', 'mprime_H', 't_lastmaj', 't_lastmin', 't_nextmaj', 't_nextmin']

print(catalog)

       snap  subid  logmstar  cam  merger rfprob  asym_I    cc_I     g_I  \
0       103      0     12.20    0   False  0.103  0.2076  2.9771  0.5534   
1       103      0     12.20    3   False   0.15  0.2082  3.6671  0.5881   
2       103      0     12.20    1   False  0.132  0.2069  3.6939  0.6117   
3       103      0     12.20    2   False  0.231  0.2195  3.2588  0.5904   
4       103  19702     12.12    3   False   None     NaN     NaN     NaN   
...     ...    ...       ...  ...     ...    ...     ...     ...     ...   
71946    54  18603      9.83    2   False   None -0.0648  2.6204  0.4758   
71947    54   8205      9.82    1   False   None  0.0134  2.4736  0.4804   
71948    54   8205      9.82    3   False   None  0.0519  2.4502  0.4625   
71949    54   8205      9.82    2   False   None  0.1353  2.1423  0.4737   
71950    54   8205      9.82    0   False   None  0.1309  2.4773  0.4857   

        m20_I  mprime_I  asym_H    cc_H     g_H   m20_H  mprime_H  t_lastmaj  \
0     -

In [26]:
# Extract only objects from snapshot 68 (z=2)
merged_68 = catalog[catalog['snap'] == 68]
print(catalog)

       snap  subid  logmstar  cam  merger rfprob  asym_I    cc_I     g_I  \
0       103      0     12.20    0   False  0.103  0.2076  2.9771  0.5534   
1       103      0     12.20    3   False   0.15  0.2082  3.6671  0.5881   
2       103      0     12.20    1   False  0.132  0.2069  3.6939  0.6117   
3       103      0     12.20    2   False  0.231  0.2195  3.2588  0.5904   
4       103  19702     12.12    3   False   None     NaN     NaN     NaN   
...     ...    ...       ...  ...     ...    ...     ...     ...     ...   
71946    54  18603      9.83    2   False   None -0.0648  2.6204  0.4758   
71947    54   8205      9.82    1   False   None  0.0134  2.4736  0.4804   
71948    54   8205      9.82    3   False   None  0.0519  2.4502  0.4625   
71949    54   8205      9.82    2   False   None  0.1353  2.1423  0.4737   
71950    54   8205      9.82    0   False   None  0.1309  2.4773  0.4857   

        m20_I  mprime_I  asym_H    cc_H     g_H   m20_H  mprime_H  t_lastmaj  \
0     -

In [27]:
# Create an array with objects used in RF and those excluded
rf_objects = np.array([[row['subid'], row['merger']] for _, row in merged_68.iterrows()], dtype=None)

In [28]:
# Get unique object IDs and their corresponding merger labels
subid, indices = np.unique(rf_objects[:, 0], return_index=True)
RF_labels = rf_objects[indices]

shid_array = RF_labels[:, 0]
merger_labels = RF_labels[:, 1]

In [30]:
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import h5py

# Caminho do arquivo HDF5 de origem
source_path = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\pristineFITS.hdf5"

# Diretório para salvar os dados copiados
path = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\resized1"
subdirs = ['res_subdir_{:03d}'.format(x) for x in range(23)]

# Criar os subdiretórios se não existirem
for subdir in subdirs:
    os.makedirs(os.path.join(path, subdir), exist_ok=True)

# Abrir o arquivo HDF5 de origem
with h5py.File(source_path, "r") as source_file:
    # Iterar por cada grupo de imagem no arquivo HDF5
    for group_name in tqdm(source_file.keys(), desc="Copiando dados"):
        # Extrair o índice do subdiretório da parte numérica do nome do grupo
        group_index = int(group_name.split("_")[1])  # Assume que o nome do grupo é "imagem_###"

        # Criar um novo arquivo HDF5 para cada grupo de imagem
        group_path = os.path.join(path, subdirs[group_index], f"{group_name}.hdf5")
        with h5py.File(group_path, "w") as target_file:
            # Copiar os datasets para o novo arquivo
            for dataset_name in source_file[group_name].keys():
                target_file.create_dataset(dataset_name, data=source_file[group_name][dataset_name])

Copiando dados: 100%|███████████| 16/16 [00:00<00:00, 272.98it/s]


In [32]:
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import h5py

# Diretório base
base_dir = r"C:\Users\stefa\OneDrive\Documentos\Natali 2\Pristine\resized1"

# Inicializar as listas de imagens
list_of_mergers = []
list_of_nonmergers = []

# Print the initialization of the lists
print("# Initialize lists to store image data")
print("list_of_mergers = []")
print("list_of_nonmergers = []")

# Iterar por cada subdiretório
for subdir in tqdm(os.listdir(base_dir), desc="Carregando imagens"):
    subdir_path = os.path.join(base_dir, subdir)
    if os.path.isdir(subdir_path):
        # Iterar por cada arquivo HDF5 no subdiretório
        for filename in os.listdir(subdir_path):
            if filename.endswith(".hdf5"):
                file_path = os.path.join(subdir_path, filename)
                # Carregar o arquivo HDF5
                with h5py.File(file_path, "r") as f:
                    # Obter a imagem e o rótulo da fusão
                    image_f814w = f["f814w"][...]
                    image_f160w = f["f160w"][...]
                    merger = bool(f["merger"][...])

                    # Criar um array com as duas imagens
                    image_combined = np.stack((image_f814w, image_f160w), axis=0)

                    # Adicionar à lista apropriada com base no rótulo de fusão
                    if merger:
                        list_of_mergers.append(image_combined)
                    else:
                        list_of_nonmergers.append(image_combined)

# Initialize lists to store image data
list_of_mergers = []
list_of_nonmergers = []


Carregando imagens: 100%|███████| 23/23 [00:00<00:00, 893.38it/s]


In [33]:
# Iterate through objects and their merger labels
for shid, merger in zip(shid_array, merger_labels):
    for camnum in ['00', '01', '02', '03']:
        filter_files = np.sort(np.asarray(glob.glob(os.path.join(path, 'res_sub*', '*sh' + str(shid) + 'cam' + camnum + '*SB00.npy'))))

        # Check if there are exactly 4 files for each object
        if len(filter_files) != 4:
            continue

        # Load image data for all 4 filters
        tmp = np.zeros((4, 75, 75))
        for i, file in enumerate(filter_files):
            tmp[i, :, :] = np.load(file)

        # Append image data to corresponding list based on merger label
        if merger:
            list_of_mergers.append(tmp)
        else:
            list_of_nonmergers.append(tmp)

In [34]:
# Stack image data and create labels
mergers = np.stack(list_of_mergers)
notmergers = np.stack(list_of_nonmergers)
mergers_y = np.ones(mergers.shape[0])
notmergers_y = np.zeros(notmergers.shape[0])

In [35]:
# Combine data and labels
XX = np.vstack((mergers, notmergers))
yy = np.concatenate((mergers_y, notmergers_y))

In [36]:
# Save data and labels
np.save('SB00.npy', XX)
np.save('SB00_y.npy', yy)

In [37]:
import numpy as np

# Load the saved files to get their sizes
XX_loaded = np.load('SB00.npy')
yy_loaded = np.load('SB00_y.npy')

# Print information about the saved arrays
print(f"Data (XX) saved to 'SB00.npy' with shape: {XX_loaded.shape}")
print(f"Labels (yy) saved to 'SB00_y.npy' with shape: {yy_loaded.shape}")

Data (XX) saved to 'SB00.npy' with shape: (16, 2, 75, 75)
Labels (yy) saved to 'SB00_y.npy' with shape: (16,)


In [41]:
# Load the saved files to get their sizes
XX_loaded = np.load('SB00.npy')
yy_loaded = np.load('SB00_y.npy')

In [48]:
xx_data_list = []  # Initialize an empty list to store the selected data
for i in range(len(XX)):  # Loop through all 16 samples (assuming XX is your array)
    xx_data_list.append([XX[i, 0], XX[i, 1]])  # Append a list containing data from channels 0 and 1 for each sample
xx_data = np.stack(xx_data_list)  # Combine the lists into a NumPy array
yy_data = yy  # Assign yy_data to the value of yy

In [49]:
print(XX_loaded.shape) 

(16, 2, 75, 75)


In [51]:
# Generate augmented data by applying transformations
for i in range(len(xx_data)):  # Corrected loop range
    # Up-down flip
    tmp = np.zeros((2, 75, 75))
    tmp[0, :, :] = np.flipud(xx_data[i, 0])
    tmp[1, :, :] = np.flipud(xx_data[i, 1])
    list_ud.append(tmp)

    # Left-right flip
    tmp = np.zeros((2, 75, 75))
    tmp[0, :, :] = np.fliplr(xx_data[i, 0])
    tmp[1, :, :] = np.fliplr(xx_data[i, 1])
    list_lr.append(tmp)

    # 90 degrees rotation
    tmp = np.zeros((2, 75, 75))
    tmp[0, :, :] = np.rot90(xx_data[i, 0])
    tmp[1, :, :] = np.rot90(xx_data[i, 1])
    list_rot.append(tmp)

    # 180 degrees rotation
    tmp = np.zeros((2, 75, 75))
    tmp[0, :, :] = np.rot90(np.rot90(xx_data[i, 0]))
    tmp[1, :, :] = np.rot90(np.rot90(xx_data[i, 1]))
    list_rot180.append(tmp)


In [52]:
# Convert the lists to numpy arrays
mergers_ud = np.stack(list_ud)
mergers_lr = np.stack(list_lr)
mergers_rot = np.stack(list_rot)
mergers_rot180 = np.stack(list_rot180)

In [53]:
# Create labels for the augmented data
y_ud = np.ones(mergers_ud.shape[0])
y_lr = np.ones(mergers_lr.shape[0])
y_rot = np.ones(mergers_rot.shape[0])
y_rot180 = np.ones(mergers_rot180.shape[0])

In [54]:
# Combine the original and augmented data
X_augmented = np.vstack((xx_data[:1624], mergers_ud, mergers_lr, mergers_rot, mergers_rot180, xx_data[1624:]))
y_augmented = np.concatenate((yy_data[:1624], y_ud, y_lr, y_rot, y_rot180, yy_data[1624:]))

In [55]:
# Save the augmented data
np.save('SB00_augmented.npy', X_augmented)
np.save('SB00_augmented_y.npy', y_augmented)

In [56]:
# Print the lengths of the augmented datasets
print(len(X_augmented))
print(len(y_augmented))

144
144
