removed asymemtric error case

HeloiseS · Dec 8, 2020 · 7433ad8 · 7433ad8
1 parent a5110cc
commit 7433ad8
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 277 deletions.
diff --git a/hoki/age/tests/test_utils.py b/hoki/age/tests/test_utils.py
@@ -58,31 +58,6 @@
                                     'logT_err': np.array([0.1, 0.2, 0.1, 0.1]),
                                     })
 
-
-
-stars_NOTSYM = pd.DataFrame.from_dict({'name': np.array(['118-1', '118-2', '118-3', '118-4']),
-                                       'logL': np.array([5.0, 5.1, 4.9, 5.9]),
-                                       'logL_m': np.array([0.1, 0.2, 0.1, 0.1, ]),
-                                       'logL_p': np.array([0.1, 0.23, 0.1, 0.1]),
-                                       'logT': np.array([4.48, 4.45, 4.46, 4.47]),
-                                       'logT_m': np.array([0.1, 0.2, 0.1, 0.1]),
-                                       'logT_p': np.array([0.1, 0.23, 0.1, 0.1]),
-                                       })
-stars_NOTSYM_T_err_missing = pd.DataFrame.from_dict({'name': np.array(['118-1', '118-2', '118-3', '118-4']),
-                                       'logL': np.array([5.0, 5.1, 4.9, 5.9]),
-                                       'logL_m': np.array([0.1, 0.2, 0.1, 0.1, ]),
-                                       'logL_p': np.array([0.1, 0.23, 0.1, 0.1]),
-                                       'logT': np.array([4.48, 4.45, 4.46, 4.47]),
-                                       })
-
-
-
-# asymmetric errors test
-m = np.array([0.1, .17, 0.13])
-c = np.array([6.33, 6.25, 6.35])
-p = np.array([0.12, 0.18, 0.15])
-cdf = np.array([0.16, 0.5, 0.84])
-
 #########
 # MISC  #
 #########
@@ -96,22 +71,13 @@ def test_basic(self):
         assert norm[2] == 1, 'Normalisation done wrong'
         assert sum(norm) == 1, "Normalisaton done wrong"
 
-
-class TestFitLognormParams(object):
-    def test_it_runs(self):
-        bs, ss, serrs = au.fit_lognorm_params(c, m, p)
-        assert np.sum(np.isclose(bs, np.array([5.33, 5.25, 5.35]), atol=1e-2)) == 3,  f"{bs}"
-
-
 class TestErrorFlag(object):
     def test_no_err(self):
         assert au._error_flag(stars_none) is None, "Error Flag should be None"
 
-    def test_SYM(self):
-        assert au._error_flag(stars_SYM) == 'SYM', "Error Flag should be SYM"
+    def test_ERR(self):
+        assert au._error_flag(stars_SYM) == 'ERR', "Error Flag should be ERR"
 
-    def test_NOTSYM(self):
-        assert au._error_flag(stars_NOTSYM) == 'NOTSYM', "Error Flag should be SYM"
 
 #######################
 # FINDING COORDINATES #
@@ -193,34 +159,6 @@ def test_symmetric_errors(self):
         pdf_df = au.calculate_individual_pdfs(stars_SYM, myhrd)
         assert not np.isnan(sum(pdf_df['118-4'])), "something went wrong with symmetric errors"
 
-    def test_asymmetric_errors(self):
-        pdf_df = au.calculate_individual_pdfs(stars_NOTSYM, myhrd)
-        assert not np.isnan(sum(pdf_df['118-4'])), "something went wrong with asymmetric errors"
-
-####  Asymmetric errors
-
-class TestAddErrorFlagColumnHRD(object):
-    def test_basic(self):
-        df = stars_NOTSYM.copy()
-        au.add_error_flag_column_hrd(df)
-        assert df.xerr_flag[1] == 'NOTSYM', "Incorrect flag"
-        del df
-
-    def test_T_err_missing(self):
-        df = stars_NOTSYM_T_err_missing.copy()
-        au.add_error_flag_column_hrd(df)
-        assert df.xerr_flag[1] == 'None', "Incorrect flag"
-
-    # Because of randomness I can't check the accuracy of the value, but at least I can make sure
-    # it runs to give us a value.
-    def test_SYM_T_err_missing(self):
-        pdfs = au.calculate_individual_pdfs(stars_SYM_T_err_missing, myhrd)
-        assert isinstance(pdfs['118-4'][5], float), "Not producing values with SYM_T_err_missing"
-
-    def test_NOTSYM_T_err_missing(self):
-        pdfs = au.calculate_individual_pdfs(stars_NOTSYM_T_err_missing, myhrd)
-        assert isinstance(pdfs['118-4'][5], float),"Not producing values with NOTSYM_T_err_missing"
-
 #####################################
 # PUTTING PDFS TOGETHER IN SOME WAY #
 #####################################

diff --git a/hoki/age/utils.py b/hoki/age/utils.py
@@ -87,23 +87,9 @@ def _error_flag(obs_df):
     concat_columns = ''.join(map(str, obs_df.columns.to_list()[1:]))
 
     if '_err' in concat_columns:
-        error_flag = 'SYM'
+        error_flag = 'ERR'
         print(f'{Dialogue.info()} ERROR_FLAG=SYM / Strictily symmetric errors detected')
 
-    elif '_m' in concat_columns and '_p' in concat_columns:
-        error_flag = 'NOTSYM'
-        print(f'{Dialogue.info()} ERROR_FLAG=NOTSYM / Asymmetric errors detected')
-
-    elif '_m' in concat_columns and '_p' not in concat_columns:
-        debug_message = f"A column '*_m' was found but not column '*_p'.\n\
-    {Dialogue.debugger()}  If your errors are symmetric you can use the _err tag. See manual and tutorials for more info"
-        raise (HokiFormatError(debug_message))
-
-    elif '_m' not in concat_columns and '_p' in concat_columns:
-        debug_message = f"A column '*_p' was found but not column '*_m'.\n\
-    {Dialogue.debugger()}  If your errors are symmetric you can use the _err tag. See manual and tutorials for more info"
-        raise (HokiFormatError(debug_message))
-
     else:
         print(f'{Dialogue.info()} ERROR_FLAG=None / No errors detected')
 
@@ -295,25 +281,9 @@ def calculate_individual_pdfs(obs_df, model, nsamples=500):
     if flag is None:
         pdfs = calculate_individual_pdfs_None(obs_df, model)
 
-    # TODO: WILL NEED TO TAKE CARE OF CMDs too
-
-    elif flag == 'SYM':
+    elif flag == 'ERR':
         pdfs = calculate_individual_pdfs_SYM_HRD(obs_df, model)
 
-    elif flag == 'NOTSYM':
-        # will brak with CMDs
-        try:
-            obs_df.logL_m = np.abs(obs_df.logL_m)
-        except AttributeError:
-            pass
-
-        try:
-            obs_df.logT_m = np.abs(obs_df.logT_m)
-        except AttributeError:
-            pass
-
-        pdfs = calculate_individual_pdfs_NOTSYM_HRD(obs_df, model, nsamples=nsamples) #self.nsamples
-
     return pdfs
 
 
@@ -374,185 +344,6 @@ def calculate_distributions(obs_df, model):
 
     return distributions_df
 
-#### ASYMMETRIC ERROR OR MIX
-
-def add_error_flag_column_hrd(obs_df):
-    """ Flagging which errors are symmetric and which are not FOR HRD """
-    error_message = f"{Dialogue.debugger} I will assume this quantity has no errors. If I am wrong, make sure " \
-                    f"your observation dataframe contains all the columns necessary"
-    try:
-        xerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.logT_p-obs_df.logT_m).values]
-    except AttributeError as e:
-        print(f"{e}")
-        print(error_message)
-        xerr_flag = "None"
-    try:
-        yerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.logL_p-obs_df.logL_m).values]
-    except AttributeError as e:
-        print(f"{e}")
-        print(error_message)
-        yerr_flag = "None"
-
-    obs_df['xerr_flag'] = xerr_flag
-    obs_df['yerr_flag'] = yerr_flag
-    return f"{Dialogue.info()} Flags Added Successfully"
-
-
-def add_error_flag_column_cmd(obs_df):
-    """Flagging which errors are symmetric and which are not FOR CMD"""
-    xerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.col_p-obs_df.col_m).values]
-    yerr_flag = ['NOTSYM' if res!=0.0 else 'SYM' for res in (obs_df.mag_p-obs_df.mag_m).values]
-
-    obs_df['xerr_flag'] = xerr_flag
-    obs_df['yerr_flag'] = yerr_flag
-    return f"{Dialogue.info()} Flags Added Successfully"
-
-
-# TODO: make a CMD version of NOTSYM
-def calculate_individual_pdfs_NOTSYM_HRD(obs_df, model, nsamples=500, p0=1):
-
-    # If source names not given we make our own
-    try:
-        source_names = obs_df.name
-    except AttributeError:
-        warnings.warn("No source names given so I'll make my own", HokiUserWarning)
-        source_names = ["s" + str(i) for i in range(obs_df.shape[0])]
-        obs_df['name']=source_names
-    # If duplicates in source names
-    if obs_df.name.unique().shape[0] - obs_df.name.shape[0] != 0.0:
-        raise HokiFormatError(f"Duplicate names detected\n{Dialogue.debugger()} "
-                              f"Please make sure the names of your sources are unique.")
-
-    add_error_flag_column_hrd(obs_df)
-    obs_df.index = obs_df.name
-
-    pdfs = pd.DataFrame(np.zeros((obs_df.name.shape[0], 51)).T,
-                      columns = obs_df.name.values)
-
-    # dataframes that will contain the individual sampled LOCATIONS
-    # size: number of samples * number of stars
-    df_Ls = pd.DataFrame(np.zeros((nsamples, obs_df.name.shape[0])).T, index=obs_df.name.values)
-    df_Ts = df_Ls.copy()
-
-
-
-
-    # lists for symetric and asymmetric errors
-
-    xobs_sym_ls, xobs_notsym_ls, xobs_none_ls = obs_df[obs_df.xerr_flag=='SYM'].name.tolist(), \
-                                                obs_df[obs_df.xerr_flag=='NOTSYM'].name.tolist(), \
-                                                obs_df[obs_df.xerr_flag=='None'].name.tolist()
-
-    yobs_sym_ls, yobs_notsym_ls, yobs_none_ls = obs_df[obs_df.yerr_flag=='SYM'].name.tolist(), \
-                                                obs_df[obs_df.yerr_flag=='NOTSYM'].name.tolist(), \
-                                                obs_df[obs_df.yerr_flag=='None'].name.tolist()
-
-    # NO ERRORS
-    for colx in xobs_none_ls:
-        # For each star (column in main) we sample n times
-        df_Ts.loc[colx] = [obs_df.loc[colx].logT]*nsamples
-
-    for coly in yobs_none_ls:
-        df_Ls.loc[coly] = [obs_df.loc[coly].logL]*nsamples
-
-
-    # SYMMETRIC ERRORS
-    # need to do it separately if not the same of symmetrical errors in T and L
-    print(f"{Dialogue.running()} Sampling symmetric errors (Gaussian)")
-    for colx in xobs_sym_ls:
-        # For each star (column in main) we sample n times
-        df_Ts.loc[colx] = np.random.normal(obs_df.loc[colx].logT, obs_df.loc[colx].logT_p, nsamples) #doesn't matter is p or not
-
-    for coly in yobs_sym_ls:
-        df_Ls.loc[coly] = np.random.normal(obs_df.loc[coly].logL, obs_df.loc[coly].logL_p, nsamples)
-
-    print(f"{Dialogue.complete()} Sampling symmetric errors (Gaussian) -- {nsamples} SAMPLES per star")
-
-    # ASYMMETRIC ERRORS
-    print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal)")
-    # extract values to feed to fit_lognorm_params
-
-    frac_err_L, frac_err_T = None, None
-    # LUMINOSITY
-    try:
-        cL, mL, pL, = obs_df.loc[yobs_notsym_ls].logL.values, \
-                     obs_df.loc[yobs_notsym_ls].logL_m.values, \
-                     obs_df.loc[yobs_notsym_ls].logL_p.values
-
-        # Fit lognorm parameters
-        B_L, S_L, Serr_L = fit_lognorm_params(cL, mL, pL)
-        frac_err_L = Serr_L/S_L
-        #print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal) -- {nsamples} SAMPLES per star")
-        # Need to sample L abd
-        i=0
-        for coly in yobs_notsym_ls:
-            df_Ls.loc[coly] = stats.lognorm.rvs(s=S_L[i], size=nsamples)+B_L[i]
-            i+=1
-
-    except AttributeError as e:
-        print(f"{Dialogue.info()} No errors on L")
-        pass
-
-    #print(f"{Dialogue.info()} Fitting Lognormal Parameters")
-
-    # TEMPERATURE
-    try:
-        cT, mT, pT, = obs_df.loc[xobs_notsym_ls].logT.values,   \
-                      obs_df.loc[xobs_notsym_ls].logT_m.values,\
-                      obs_df.loc[xobs_notsym_ls].logT_p.values
-
-        B_T, S_T, Serr_T = fit_lognorm_params(cT, mT, pT)
-        frac_err_T = Serr_T/S_T
-        #print(f"{Dialogue.running()} Sampling asymmetric errors (Lognormal) -- {nsamples} SAMPLES per star")
-        # Need to sample L abd
-        i=0
-        for colx in xobs_notsym_ls:
-            df_Ts.loc[colx] = stats.lognorm.rvs(s=S_T[i], size=nsamples)+B_T[i]
-            i+=1
-
-    except AttributeError:
-        print(f"{Dialogue.info()} No errors on T")
-        pass
-
-    ### WARNING IF FRAC ERROR TOO LARGE
-    if frac_err_L is not None:
-        warning_flag=frac_err_L>1e-3
-        bad_stars = [obs_df.name[i] for i in range(len(warning_flag)) if warning_flag[i]]
-        warnings.warn(HokiUserWarning(f'The Luminosity error lognormal fits for the following stars havea fractional error >1e-3'
-                                      f'{bad_stars} \nIT IS RECOMMENDED YOU CHECK THEM OUT'))
-    if frac_err_T is not None:
-        warning_flag=frac_err_T>1e-3
-        bad_stars = [obs_df.name[i] for i in range(len(warning_flag)) if warning_flag[i]]
-        warnings.warn(HokiUserWarning(f'The Temperature error lognormal fits for the following stars havea fractional error >1e-3'
-                                      f'{bad_stars} \nIT IS RECOMMENDED YOU CHECK THEM OUT'))
-
-
-    print(f"{Dialogue.complete()} Sampling asymmetric errors (Lognormal)")
-
-    # We're going to need to create temprary 'obs_df' that fit in pre-existing functions
-    # This is the 'template' dataframe we're going to modify in every loop
-    obs_df_temp = obs_df.copy()[['name', 'logT', 'logL']]
-
-    for i in range(nsamples):
-        obs_df_temp.logL = df_Ls[i]
-        obs_df_temp.logT = df_Ts[i]
-
-        # For each sampled HRD/CMD location we calculate the distribution
-        distribs_i = calculate_distributions(obs_df_temp, model)
-
-        # ... and add it to the Final dataframe
-        pdfs+=distribs_i
-
-    print(f"{Dialogue.info()} Distributions Calculated Successfully")
-
-    # and now that we've got our distributions all added up we normalise them!
-    for col in pdfs.columns:
-        pdfs[col]=normalise_1d(pdfs[col].values, crop_the_future=False)
-
-    print(f"{Dialogue.info()} Distributions Normalised to PDFs Successfully")
-
-    return pdfs
-
 ##### SYMMETRIC ERROR ONLY
 
 # TODO: make a CMD version of SYM

diff --git a/hoki/age/wizard.py b/hoki/age/wizard.py
@@ -19,7 +19,7 @@ class AgeWizard(HokiObject):
     AgeWizard object
     """
 
-    def __init__(self, obs_df, model, nsamples=500):
+    def __init__(self, obs_df, model):
         """
         Initialisation of the AgeWizard object
 
@@ -79,7 +79,7 @@ def __init__(self, obs_df, model, nsamples=500):
         # This line is obsolete but might need revival if we ever want to add the not normalised distributions again
         # self._distributions = calculate_distributions_normalised(self.obs_df, self.model)
 
-        self.pdfs = au.calculate_individual_pdfs(self.obs_df, self.model, nsamples=nsamples).fillna(0)
+        self.pdfs = au.calculate_individual_pdfs(self.obs_df, self.model).fillna(0)
         self.sources = self.pdfs.columns.to_list()
         self.sample_pdf = None
         self._most_likely_age = None