In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.models import load_model


2024-08-01 13:57:26.583193: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def sample_2D(probs):
    """
    Sample galaxies according to joint distribution P(x, y) (2D).
    A galaxy is a pair of properties (x, y).

    :param probs: probabilities
    :type probs: numpy.array; shape = (nbin x nbin)
    :param nbin: number of bins in each dimension
    :type nbin: int

    :return: (x, y) coordinates in the target map for the galaxy in probs
    :rtype: numpy.array, numpy.array
    """
    nbin = 50
    # Bin indices
    vec = np.arange(0, int(nbin**2), 1)

    # Pick the galaxy's PDF
    probs_round = np.round(probs * 1000)

    # List with bin indices. The PDF dictates the number of times each bin appears.
    bin_to_sample = np.repeat(vec, probs_round.astype(int))

    # Now we draw a number from the above list. The number correspond to the bin index.
    choice = np.random.choice(bin_to_sample)

    # Back: identify the bin in the grid nbin x nbin
    choice_m = np.eye(int(nbin**2))[choice]
    choice_m = choice_m.reshape(nbin, nbin)

    # Identify y and x corresponding to the bin
    y_sampled, x_sampled = np.where(choice_m != 0)

    return y_sampled, x_sampled


In [3]:
def discretize_inverse(prop_values, prop_name):
    """
    Associate galaxy property value to bin index (class).

    :param prop_values: bin indices (classes) to be converted into continuous value
    :type prop_values: numpy.array
    :param prop_name: 'smass', 'color', 'sSFR' or 'radius'
    :type prop_name: str

    :return: values of the properties for each data set element
    :rtype: numpy.array
    """
    
    edges = pd.read_csv('../../data/bin_edges.csv'.format(nbin))[prop_name].to_numpy()
    means = pd.read_csv('../../data/bin_means.csv'.format(nbin))[prop_name].to_numpy()

    values_prop = []

    for v in prop_values:
        values_prop.append(means[v])

    return np.array(values_prop)

In [4]:
X_test = pd.read_csv('../data/splited/X_test.csv')

In [5]:
x_df = X_test.to_numpy()

In [6]:
model = load_model('../../models/color_smass_model.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
pred_list = model.predict(x_df)

[1m1630/1630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step


2024-08-01 13:57:35.979925: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 521500000 exceeds 10% of free system memory.


In [8]:
pred_list.shape

(52150, 2500)

In [9]:
smass = []
color = []

In [10]:
x_name = 'smass'  # x-axis property, X1
y_name = 'color'  # y-axis property, X2
nbin = 50

In [11]:
for pred in pred_list:
    y_values, x_values = sample_2D(pred)
    y_values_prop = discretize_inverse(y_values, y_name)
    x_values_prop = discretize_inverse(x_values, x_name)
    smass.append(x_values_prop[0])
    color.append(y_values_prop[0])

In [12]:
smass_nparr = np.array(smass)
color_nparr = np.array(color)

In [13]:
if smass_nparr.ndim == 1:
    smass_nparr = np.expand_dims(smass_nparr, axis=1)

if color_nparr.ndim == 1:
    color_nparr = np.expand_dims(color_nparr, axis=1)


In [14]:
concat = np.concatenate((smass_nparr,color_nparr), axis=1)

In [15]:
concat

array([[ 8.7925,  0.5664],
       [ 8.8775,  0.5968],
       [ 9.3875,  0.4448],
       ...,
       [ 9.0475,  0.384 ],
       [10.1525,  0.5968],
       [10.0675,  0.7184]])

In [16]:
df = pd.DataFrame(concat, columns=['stellar_mass', 'color_g_i'])

# Guarda el DataFrame en un archivo CSV
df.to_csv('../data/output_paper.csv', index=False)