In [57]:
import numpy as np
import pandas as pd
import scipy.stats as sps
from scipy.interpolate import CubicSpline, LinearNDInterpolator, interp1d, NearestNDInterpolator
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor

df = pd.read_csv("training_data.csv")
df = df.query("z != 0 or Y != 0 or i != 0 or r != 0") 
df['log_g'] = df['log_g'].str[1:].astype('float')
df['M_H'] = df['M_H']*(-0.1)
x = df[['M_H', 'T_eff', 'log_g']].to_numpy()
y = df[['z', 'Y', 'i', 'r']].to_numpy()
y = -2.5*np.log10(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [11]:
mag_spline_nearest = NearestNDInterpolator(x=x_train, y=y_train,rescale=True)
mean_squared_error(y_test, mag_spline_nearest(x_test))

0.09936206391604482

In [13]:
mag_spline_linear = LinearNDInterpolator(x_train, y_train, fill_value=np.mean(y_train), rescale=True)
mean_squared_error(y_test, mag_spline_linear(x_test))


0.11493798388794618

In [15]:
mag_RF = RandomForestRegressor(random_state=43).fit(x_train, y_train)
mean_squared_error(y_test, mag_RF.predict(x_test))

0.06154014153644487

In [17]:
x_scaler = StandardScaler().fit(x_train)
y_scaler = StandardScaler().fit(y_train)
scaled_x_train, scaled_y_train = x_scaler.transform(x_train), y_scaler.transform(y_train)
scaled_x_test, scaled_y_test = x_scaler.transform(x_test), y_scaler.transform(y_test)


# 2D plot of squared errors for each point?
# try other model
# Plot y-predicted over y-true for all 4 passbands

In [81]:
clf = Ridge(alpha=1.0).fit(scaled_x_train, scaled_y_train)
print(mean_squared_error(scaled_y_test, clf.predict(scaled_x_test)))
print(mean_squared_error(y_test, y_scaler.inverse_transform(clf.predict(scaled_x_test))))

0.4082484166402428
2.7483560434876946


In [73]:
regr = MLPRegressor(random_state=32)
regr.fit(scaled_x_train, scaled_y_train)
regr.predict(scaled_x_test)
# return coefficient of determination of the prediction
regr.score(scaled_x_test, scaled_y_test)
mean_squared_error(y_test, y_scaler.inverse_transform(regr.predict(scaled_x_test)))

# make plots for diff models magnitudes as function of M_J, also do for flux to absolute magnitude and experiment with hyperparameters

0.08174931865959634