Skip to content

Commit

Permalink
removed asymemtric error case
Browse files Browse the repository at this point in the history
  • Loading branch information
HeloiseS committed Dec 8, 2020
1 parent a5110cc commit 7433ad8
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 277 deletions.
66 changes: 2 additions & 64 deletions hoki/age/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,31 +58,6 @@
'logT_err': np.array([0.1, 0.2, 0.1, 0.1]),
})



stars_NOTSYM = pd.DataFrame.from_dict({'name': np.array(['118-1', '118-2', '118-3', '118-4']),
'logL': np.array([5.0, 5.1, 4.9, 5.9]),
'logL_m': np.array([0.1, 0.2, 0.1, 0.1, ]),
'logL_p': np.array([0.1, 0.23, 0.1, 0.1]),
'logT': np.array([4.48, 4.45, 4.46, 4.47]),
'logT_m': np.array([0.1, 0.2, 0.1, 0.1]),
'logT_p': np.array([0.1, 0.23, 0.1, 0.1]),
})
stars_NOTSYM_T_err_missing = pd.DataFrame.from_dict({'name': np.array(['118-1', '118-2', '118-3', '118-4']),
'logL': np.array([5.0, 5.1, 4.9, 5.9]),
'logL_m': np.array([0.1, 0.2, 0.1, 0.1, ]),
'logL_p': np.array([0.1, 0.23, 0.1, 0.1]),
'logT': np.array([4.48, 4.45, 4.46, 4.47]),
})



# asymmetric errors test
m = np.array([0.1, .17, 0.13])
c = np.array([6.33, 6.25, 6.35])
p = np.array([0.12, 0.18, 0.15])
cdf = np.array([0.16, 0.5, 0.84])

#########
# MISC #
#########
Expand All @@ -96,22 +71,13 @@ def test_basic(self):
assert norm[2] == 1, 'Normalisation done wrong'
assert sum(norm) == 1, "Normalisaton done wrong"


class TestFitLognormParams(object):
def test_it_runs(self):
bs, ss, serrs = au.fit_lognorm_params(c, m, p)
assert np.sum(np.isclose(bs, np.array([5.33, 5.25, 5.35]), atol=1e-2)) == 3, f"{bs}"


class TestErrorFlag(object):
def test_no_err(self):
assert au._error_flag(stars_none) is None, "Error Flag should be None"

def test_SYM(self):
assert au._error_flag(stars_SYM) == 'SYM', "Error Flag should be SYM"
def test_ERR(self):
assert au._error_flag(stars_SYM) == 'ERR', "Error Flag should be ERR"

def test_NOTSYM(self):
assert au._error_flag(stars_NOTSYM) == 'NOTSYM', "Error Flag should be SYM"

#######################
# FINDING COORDINATES #
Expand Down Expand Up @@ -193,34 +159,6 @@ def test_symmetric_errors(self):
pdf_df = au.calculate_individual_pdfs(stars_SYM, myhrd)
assert not np.isnan(sum(pdf_df['118-4'])), "something went wrong with symmetric errors"

def test_asymmetric_errors(self):
pdf_df = au.calculate_individual_pdfs(stars_NOTSYM, myhrd)
assert not np.isnan(sum(pdf_df['118-4'])), "something went wrong with asymmetric errors"

#### Asymmetric errors

class TestAddErrorFlagColumnHRD(object):
def test_basic(self):
df = stars_NOTSYM.copy()
au.add_error_flag_column_hrd(df)
assert df.xerr_flag[1] == 'NOTSYM', "Incorrect flag"
del df

def test_T_err_missing(self):
df = stars_NOTSYM_T_err_missing.copy()
au.add_error_flag_column_hrd(df)
assert df.xerr_flag[1] == 'None', "Incorrect flag"

# Because of randomness I can't check the accuracy of the value, but at least I can make sure
# it runs to give us a value.
def test_SYM_T_err_missing(self):
pdfs = au.calculate_individual_pdfs(stars_SYM_T_err_missing, myhrd)
assert isinstance(pdfs['118-4'][5], float), "Not producing values with SYM_T_err_missing"

def test_NOTSYM_T_err_missing(self):
pdfs = au.calculate_individual_pdfs(stars_NOTSYM_T_err_missing, myhrd)
assert isinstance(pdfs['118-4'][5], float),"Not producing values with NOTSYM_T_err_missing"

#####################################
# PUTTING PDFS TOGETHER IN SOME WAY #
#####################################
Expand Down
213 changes: 2 additions & 211 deletions hoki/age/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,9 @@ def _error_flag(obs_df):
concat_columns = ''.join(map(str, obs_df.columns.to_list()[1:]))

if '_err' in concat_columns:
error_flag = 'SYM'
error_flag = 'ERR'
print(f'{Dialogue.info()} ERROR_FLAG=SYM / Strictily symmetric errors detected')

elif '_m' in concat_columns and '_p' in concat_columns:
error_flag = 'NOTSYM'
print(f'{Dialogue.info()} ERROR_FLAG=NOTSYM / Asymmetric errors detected')

elif '_m' in concat_columns and '_p' not in concat_columns:
debug_message = f"A column '*_m' was found but not column '*_p'.\n\
{Dialogue.debugger()} If your errors are symmetric you can use the _err tag. See manual and tutorials for more info"
raise (HokiFormatError(debug_message))

elif '_m' not in concat_columns and '_p' in concat_columns:
debug_message = f"A column '*_p' was found but not column '*_m'.\n\
{Dialogue.debugger()} If your errors are symmetric you can use the _err tag. See manual and tutorials for more info"
raise (HokiFormatError(debug_message))

else:
print(f'{Dialogue.info()} ERROR_FLAG=None / No errors detected')

Expand Down Expand Up @@ -295,25 +281,9 @@ def calculate_individual_pdfs(obs_df, model, nsamples=500):
if flag is None:
pdfs = calculate_individual_pdfs_None(obs_df, model)

# TODO: WILL NEED TO TAKE CARE OF CMDs too

elif flag == 'SYM':
elif flag == 'ERR':
pdfs = calculate_individual_pdfs_SYM_HRD(obs_df, model)

elif flag == 'NOTSYM':
# will brak with CMDs
try:
obs_df.logL_m = np.abs(obs_df.logL_m)
except AttributeError:
pass

try:
obs_df.logT_m = np.abs(obs_df.logT_m)
except AttributeError:
pass

pdfs = calculate_individual_pdfs_NOTSYM_HRD(obs_df, model, nsamples=nsamples) #self.nsamples

return pdfs


Expand Down Expand Up @@ -374,185 +344,6 @@ def calculate_distributions(obs_df, model):

return distributions_df

#### ASYMMETRIC ERROR OR MIX

def add_error_flag_column_hrd(obs_df):
""" Flagging which errors are symmetric and which are not FOR HRD """
error_message = f"{Dialogue.debugger} I will assume this quantity has no errors. If I am wrong, make sure " \
f"your observation dataframe contains all the columns necessary"
try:
xerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.logT_p-obs_df.logT_m).values]
except AttributeError as e:
print(f"{e}")
print(error_message)
xerr_flag = "None"
try:
yerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.logL_p-obs_df.logL_m).values]
except AttributeError as e:
print(f"{e}")
print(error_message)
yerr_flag = "None"

obs_df['xerr_flag'] = xerr_flag
obs_df['yerr_flag'] = yerr_flag
return f"{Dialogue.info()} Flags Added Successfully"


def add_error_flag_column_cmd(obs_df):
"""Flagging which errors are symmetric and which are not FOR CMD"""
xerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.col_p-obs_df.col_m).values]
yerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.mag_p-obs_df.mag_m).values]

obs_df['xerr_flag'] = xerr_flag
obs_df['yerr_flag'] = yerr_flag
return f"{Dialogue.info()} Flags Added Successfully"


# TODO: make a CMD version of NOTSYM
def calculate_individual_pdfs_NOTSYM_HRD(obs_df, model, nsamples=500, p0=1):

# If source names not given we make our own
try:
source_names = obs_df.name
except AttributeError:
warnings.warn("No source names given so I'll make my own", HokiUserWarning)
source_names = ["s" + str(i) for i in range(obs_df.shape[0])]
obs_df['name']=source_names
# If duplicates in source names
if obs_df.name.unique().shape[0] - obs_df.name.shape[0] != 0.0:
raise HokiFormatError(f"Duplicate names detected\n{Dialogue.debugger()} "
f"Please make sure the names of your sources are unique.")

add_error_flag_column_hrd(obs_df)
obs_df.index = obs_df.name

pdfs = pd.DataFrame(np.zeros((obs_df.name.shape[0], 51)).T,
columns = obs_df.name.values)

# dataframes that will contain the individual sampled LOCATIONS
# size: number of samples * number of stars
df_Ls = pd.DataFrame(np.zeros((nsamples, obs_df.name.shape[0])).T, index=obs_df.name.values)
df_Ts = df_Ls.copy()




# lists for symetric and asymmetric errors

xobs_sym_ls, xobs_notsym_ls, xobs_none_ls = obs_df[obs_df.xerr_flag=='SYM'].name.tolist(), \
obs_df[obs_df.xerr_flag=='NOTSYM'].name.tolist(), \
obs_df[obs_df.xerr_flag=='None'].name.tolist()

yobs_sym_ls, yobs_notsym_ls, yobs_none_ls = obs_df[obs_df.yerr_flag=='SYM'].name.tolist(), \
obs_df[obs_df.yerr_flag=='NOTSYM'].name.tolist(), \
obs_df[obs_df.yerr_flag=='None'].name.tolist()

# NO ERRORS
for colx in xobs_none_ls:
# For each star (column in main) we sample n times
df_Ts.loc[colx] = [obs_df.loc[colx].logT]*nsamples

for coly in yobs_none_ls:
df_Ls.loc[coly] = [obs_df.loc[coly].logL]*nsamples


# SYMMETRIC ERRORS
# need to do it separately if not the same of symmetrical errors in T and L
print(f"{Dialogue.running()} Sampling symmetric errors (Gaussian)")
for colx in xobs_sym_ls:
# For each star (column in main) we sample n times
df_Ts.loc[colx] = np.random.normal(obs_df.loc[colx].logT, obs_df.loc[colx].logT_p, nsamples) #doesn't matter is p or not

for coly in yobs_sym_ls:
df_Ls.loc[coly] = np.random.normal(obs_df.loc[coly].logL, obs_df.loc[coly].logL_p, nsamples)

print(f"{Dialogue.complete()} Sampling symmetric errors (Gaussian) -- {nsamples} SAMPLES per star")

# ASYMMETRIC ERRORS
print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal)")
# extract values to feed to fit_lognorm_params

frac_err_L, frac_err_T = None, None
# LUMINOSITY
try:
cL, mL, pL, = obs_df.loc[yobs_notsym_ls].logL.values, \
obs_df.loc[yobs_notsym_ls].logL_m.values, \
obs_df.loc[yobs_notsym_ls].logL_p.values

# Fit lognorm parameters
B_L, S_L, Serr_L = fit_lognorm_params(cL, mL, pL)
frac_err_L = Serr_L/S_L
#print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal) -- {nsamples} SAMPLES per star")
# Need to sample L abd
i=0
for coly in yobs_notsym_ls:
df_Ls.loc[coly] = stats.lognorm.rvs(s=S_L[i], size=nsamples)+B_L[i]
i+=1

except AttributeError as e:
print(f"{Dialogue.info()} No errors on L")
pass

#print(f"{Dialogue.info()} Fitting Lognormal Parameters")

# TEMPERATURE
try:
cT, mT, pT, = obs_df.loc[xobs_notsym_ls].logT.values, \
obs_df.loc[xobs_notsym_ls].logT_m.values,\
obs_df.loc[xobs_notsym_ls].logT_p.values

B_T, S_T, Serr_T = fit_lognorm_params(cT, mT, pT)
frac_err_T = Serr_T/S_T
#print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal) -- {nsamples} SAMPLES per star")
# Need to sample L abd
i=0
for colx in xobs_notsym_ls:
df_Ts.loc[colx] = stats.lognorm.rvs(s=S_T[i], size=nsamples)+B_T[i]
i+=1

except AttributeError:
print(f"{Dialogue.info()} No errors on T")
pass

### WARNING IF FRAC ERROR TOO LARGE
if frac_err_L is not None:
warning_flag=frac_err_L>1e-3
bad_stars = [obs_df.name[i] for i in range(len(warning_flag)) if warning_flag[i]]
warnings.warn(HokiUserWarning(f'The Luminosity error lognormal fits for the following stars havea fractional error >1e-3'
f'{bad_stars} \nIT IS RECOMMENDED YOU CHECK THEM OUT'))
if frac_err_T is not None:
warning_flag=frac_err_T>1e-3
bad_stars = [obs_df.name[i] for i in range(len(warning_flag)) if warning_flag[i]]
warnings.warn(HokiUserWarning(f'The Temperature error lognormal fits for the following stars havea fractional error >1e-3'
f'{bad_stars} \nIT IS RECOMMENDED YOU CHECK THEM OUT'))


print(f"{Dialogue.complete()} Sampling asymmetric errors (Lognormal)")

# We're going to need to create temprary 'obs_df' that fit in pre-existing functions
# This is the 'template' dataframe we're going to modify in every loop
obs_df_temp = obs_df.copy()[['name', 'logT', 'logL']]

for i in range(nsamples):
obs_df_temp.logL = df_Ls[i]
obs_df_temp.logT = df_Ts[i]

# For each sampled HRD/CMD location we calculate the distribution
distribs_i = calculate_distributions(obs_df_temp, model)

# ... and add it to the Final dataframe
pdfs+=distribs_i

print(f"{Dialogue.info()} Distributions Calculated Successfully")

# and now that we've got our distributions all added up we normalise them!
for col in pdfs.columns:
pdfs[col]=normalise_1d(pdfs[col].values, crop_the_future=False)

print(f"{Dialogue.info()} Distributions Normalised to PDFs Successfully")

return pdfs

##### SYMMETRIC ERROR ONLY

# TODO: make a CMD version of SYM
Expand Down
4 changes: 2 additions & 2 deletions hoki/age/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class AgeWizard(HokiObject):
AgeWizard object
"""

def __init__(self, obs_df, model, nsamples=500):
def __init__(self, obs_df, model):
"""
Initialisation of the AgeWizard object
Expand Down Expand Up @@ -79,7 +79,7 @@ def __init__(self, obs_df, model, nsamples=500):
# This line is obsolete but might need revival if we ever want to add the not normalised distributions again
# self._distributions = calculate_distributions_normalised(self.obs_df, self.model)

self.pdfs = au.calculate_individual_pdfs(self.obs_df, self.model, nsamples=nsamples).fillna(0)
self.pdfs = au.calculate_individual_pdfs(self.obs_df, self.model).fillna(0)
self.sources = self.pdfs.columns.to_list()
self.sample_pdf = None
self._most_likely_age = None
Expand Down

0 comments on commit 7433ad8

Please sign in to comment.