# Milky Way Mapper's Galaxy

## Section 7: Paper Tasks

In [45]:
from tensorflow import keras
# network set up all the other training sample students agreed upon
# reportedly yields ~67% accuracy
neurons_per_layer = 20
n_layers = 5
iterations = 200

# clean, clear functions, comments
# for later github upload

def build_age_model():
    inputs = keras.Input(shape=(6,))
    x = inputs
    for _ in range(n_layers):
        x = keras.layers.Dense(neurons_per_layer, activation = 'relu')(x)
    outputs = keras.layers.Dense(1)(x)
    model = keras.Model(inputs = inputs, outputs = outputs, name = 'age_network')
    model.compile(loss = keras.losses.MeanSquaredError(), optimizer = keras.optimizers.Adam(), metrics = ['mse'])
    return model

def prepare_training_mats(agedata, data_id_col, age_id_col):
    # id cross-matching
    _, ind_a, ind_b = np.intersect1d(data_masked[data_id_col], agedata[age_id_col], return_indices=True)
    # feature matrix
    fullx = np.dstack([data_masked['teff'][ind_a], data_masked['logg'][ind_a], data_masked['m_h_atm'][ind_a], data_masked['alpha_m_atm'][ind_a], data_masked['c_h'][ind_a], data_masked['n_h'][ind_a]])[0]
    fully = np.dstack([agedata['Age'][ind_b]])[0]
    # subtracting NaN/INFs (no actual cuts yet)
    mask = np.all(np.isfinite(fullx), axis=1) & np.all(np.isfinite(fully), axis=1)
    fullx, fully = fullx[mask], fully[mask]
    scaling_x = np.median(fullx, axis=0)
    scaling_y = np.median(fully, axis=0)
    fullx_scaled  = fullx/scaling_x
    fully_scaled  = fully/scaling_y
    # train–test split
    tenpercent = len(fully_scaled) // 10
    trainbin = slice(0, -1 * tenpercent - 1)
    testing = slice(-1 * tenpercent, -1)
    x_train, y_train = fullx_scaled[trainbin], fully_scaled[trainbin]
    x_test,  y_test  = fullx_scaled[testing],  fully_scaled[testing]
    return (x_train, y_train, x_test, y_test, scaling_x, scaling_y, fullx_scaled, fully_scaled)

######APOKASC-2 TRAINING######
agedata2 = apokasc2  # reusing more notebook as hopefully intended
x_train2, y_train2, x_test2, y_test2, sx2, sy2, _, _ = prepare_training_mats(agedata2, data_id_col='sdss4_apogee_id', age_id_col='2MASS')

model_apo2 = build_age_model()
model_apo2.fit(x_train2, y_train2, epochs=iterations, validation_split=0.05, batch_size=300,verbose=1)

#####APOKASC-3 TRAINING######
agedata3 = apokasc3
x_train3, y_train3, x_test3, y_test3, sx3, sy3, _, _ = prepare_training_mats(agedata3, data_id_col='gaia_dr3_source_id', age_id_col='GaiaDR3')

model_apo3 = build_age_model()
model_apo3.fit(x_train3, y_train3, epochs=iterations, validation_split=0.05, batch_size=300, verbose=1)

Epoch 1/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.7891 - mse: 0.7891 - val_loss: 0.5796 - val_mse: 0.5796
Epoch 2/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5516 - mse: 0.5516 - val_loss: 0.3930 - val_mse: 0.3930
Epoch 3/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4485 - mse: 0.4485 - val_loss: 0.3377 - val_mse: 0.3377
Epoch 4/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4088 - mse: 0.4088 - val_loss: 0.3118 - val_mse: 0.3118
Epoch 5/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3771 - mse: 0.3771 - val_loss: 0.2841 - val_mse: 0.2841
Epoch 6/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3503 - mse: 0.3503 - val_loss: 0.2618 - val_mse: 0.2618
Epoch 7/200
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 

<keras.src.callbacks.history.History at 0x7cced83f4290>

In [46]:
######AVG. DIFF. COMPUTATION######
import pandas as pd # why wasn't this imported yet

# cuts decided upon
good = np.where(
    (data['teff']    > 3700) & (data['teff']    < 5300) &
    (data['logg']    > 0.9 ) & (data['logg']    < 3.3 ) &
    (data['m_h_atm'] > -2.0) & (data['m_h_atm'] < 0.6 ) &
    (data['flag_bad'] == False)
)

# feature matrix of ONLY "good" stars
good_x = np.dstack([data['teff'][good], data['logg'][good], data['m_h_atm'][good], data['alpha_m_atm'][good], data['c_h'][good], data['n_h'][good]])[0]
good_x_apok2 = good_x / sx2
good_x_apok3 = good_x / sx3
# predict ages (in scaled units)
pred_apok2_scaled = model_apo2.predict(good_x_apok2, verbose=0)
pred_apok3_scaled = model_apo3.predict(good_x_apok3, verbose=0)
# in Gyr
ages_apok2_good = (pred_apok2_scaled * sy2).flatten()
ages_apok3_good = (pred_apok3_scaled * sy3).flatten()

# subtracting
delta_age_3_minus_2 = ages_apok3_good - ages_apok2_good
finite = np.isfinite(delta_age_3_minus_2)
mean_diff = delta_age_3_minus_2[finite].mean()

tic_good = np.array(data['tic_v8_id'][good]).flatten() # just getting TICs

###### TABLE CREATION (.csv) ######
age_table = pd.DataFrame({'TIC': tic_good, 'Age_APOKASC2_Gyr': ages_apok2_good, 'Age_APOKASC3_Gyr': ages_apok3_good, 'Delta_age_3_minus_2_Gyr': delta_age_3_minus_2})
age_table.to_csv('APOKASC2_vs_APOKASC3_predicted_ages_TIC_good.csv', index=False, na_rep='nan') # NaN catching

print("Saved APOKASC2_vs_APOKASC3_predicted_ages_TIC_good.csv")
print("Done.")


Epoch 1/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 1.1761 - mse: 1.1761 - val_loss: 0.7028 - val_mse: 0.7028
Epoch 2/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.8096 - mse: 0.8096 - val_loss: 0.5691 - val_mse: 0.5691
Epoch 3/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.6888 - mse: 0.6888 - val_loss: 0.5229 - val_mse: 0.5229
Epoch 4/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.6375 - mse: 0.6375 - val_loss: 0.4997 - val_mse: 0.4997
Epoch 5/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.6149 - mse: 0.6149 - val_loss: 0.4928 - val_mse: 0.4928
Epoch 6/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.6099 - mse: 0.6099 - val_loss: 0.4840 - val_mse: 0.4840
Epoch 7/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0