In [2]:
import os
import numpy as np
from astropy.table import Table
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor

# Embedding root
EMBEDDING_ROOT = "/mnt/home/lparker/astropile_provabgs_desi_decals"

# Set up the paths
train_path = os.path.join(EMBEDDING_ROOT, "train_provabgs.hdf5")
test_path = os.path.join(EMBEDDING_ROOT, "test_provabgs.hdf5")

# Get embeddings and PROVABGS table
train_provabgs = Table.read(train_path)
test_provabgs = Table.read(test_path)

# Properties
cols = ['LOG_MSTAR', 'sSFR', 'Z_MW', 'Z_HP', 'TAGE_MW']

# Set up X and y
X_train = {"im": train_provabgs["image_embedding"], "sp": train_provabgs["spectrum_embedding"]}
X_test = {"im": test_provabgs["image_embedding"], "sp": test_provabgs["spectrum_embedding"]}
y_train = np.stack([train_provabgs[col] for col in cols]).T
y_test = np.stack([test_provabgs[col] for col in cols]).T

# Set up regressor
neigh = KNeighborsRegressor(weights="distance", n_neighbors=16)


#### From Images

In [3]:
neigh.fit(X_train["im"], y_train)
preds = neigh.predict(X_test["im"])

# Make a table of r^2 scores
knn_r2 = {key: [] for key in cols}

for i, prop in enumerate(cols):
    knn_r2[prop].append(r2_score(y_test[:, i], preds[:, i]))

knn_r2

{'LOG_MSTAR': [0.737583396970755],
 'sSFR': [0.43525226693242236],
 'Z_MW': [0.43210501837075377],
 'Z_HP': [0.8010800055020211],
 'TAGE_MW': [0.2403473690849901]}

#### From Spectra

In [4]:
from sklearn.neighbors import KNeighborsRegressor

neigh.fit(X_train["sp"], y_train)
preds = neigh.predict(X_test["sp"])

# Make a table of r^2 scores
knn_r2 = {key: [] for key in cols}

for i, prop in enumerate(cols):
    knn_r2[prop].append(r2_score(y_test[:, i], preds[:, i]))

knn_r2

{'LOG_MSTAR': [0.8793156111334741],
 'sSFR': [0.643199859468666],
 'Z_MW': [0.5846860959451257],
 'Z_HP': [0.985718123551883],
 'TAGE_MW': [0.4405237886817924]}

In [None]:
j