# III. Filter data using Cordoni's algorithm

> Francisco Carrasco Varela - Pontificia Universidad Católica de Chile (PUC) - ffcarrasco@uc.cl ⭐

<center>
<mark>The following Jupyter Notebook is used to extract and work with Gaia DR3 data<br>
    (and other data releases) </mark>
</center>

In [2]:
# Import all the libraries we will need

%matplotlib inline
from dataclasses import dataclass, field
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib
import numpy as np
import os
from astropy.io import ascii
from astropy.table import vstack, Table

import sys
sys.path.insert(0, '../Scripts/')
import Parameters as PR

In [31]:
def check_if_file_exists(filename_path: str) -> None:
    """
    Checks if a file with filtered data that should have been created in the previous step
    of this Notebook is created. If it is not created it will exit the program.
    """
    isExist = os.path.exists(filename_path)
    if not isExist:
        print("You must fully run the previous step in this Notebook and create a file with ", end='')
        print("filtered data before running this cell.")
        sys.exit("Create filtered file in previous Notebook step and retry.")
    return

dir_path = f"../Objects/{obj.name.upper()}/"
save_filename = f"2_{obj.name.upper()}_f_data.dat"

filename_filtered_path = f".{dir_path}{save_filename}"

# Check if the filtered file created in the previous step of thi Notebook exists
check_if_file_exists(filename_filtered_path)

gaia_data = Table.read(filename_filtered_path, format='ascii.ecsv') # get data from previous Notebook step
print('Data read sucessfully')

You must fully run the previous step in this Notebook and create a file with filtered data before running this cell.


SystemExit: Create filtered file in previous Notebook step and retry.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [24]:
# Get data from Vasiliev (2019) file

###################################################
object_name = "NGC104"  # <--- EDITABLE
vasiliev_file = "../ObservedData/Vasiliev_2019_Gaia_parameters.dat"
###################################################


data_list = PR.get_GC_params(vasiliev_file)
obj, success = PR.get_selected_GC(object_name, data_list)

if success:
    n_times = 60
    print("Object detected succesfully!")
    print("-"*n_times)
    print(f"Object name: {obj.name}")
    print(f"Mean Proper Motion RA (mas/yr): {obj.pm_RA} +- {obj.err_pm_RA}")
    print(f"Mean Proper Motion DEC (mas/yr): {obj.pm_DEC} +- {obj.err_pm_DEC}")
    print("-"*n_times)

Object detected succesfully!
------------------------------------------------------------
Object name: NGC104
Mean Proper Motion RA (mas/yr): 5.237 +- 0.039
Mean Proper Motion DEC (mas/yr): -2.524 +- 0.039
------------------------------------------------------------


In [30]:
def getBinSize(values: list[float], 
               numberOfDivisions: int
              ) -> (float, float, float):
    """
    Obtains the maximum and minimum value of a list and returns the difference value
    between each number that results dividing the max - min divided into N parts,
    i.e., bin size. 
    Returns the minimum, maximum and size of each bin after dividing this interval
    into N parts.
    """
    assert (numberOfDivisions != 0), "You cannot divide by zero"
    assert (numberOfDivisions != 1),  "Dividing by 1 division is nonsense"
    
    maxValue = np.amax(values)
    minValue = np.amin(values)

    return maxValue, minValue, (maxValue - minValue)/ (1.0*numberOfDivisions)

@dataclass
class parameterList:
    G_BP: list[float] = field(default_factory=list)
    G_RP: list[float] = field(default_factory=list)
    as_gof_al: list[float] = field(default_factory=list)
    parallax: list[float] = field(default_factory=list)
        
    

@dataclass(kw_only=True)
class Bin:
    ID: int = 0
    params: parameterList = field(default_factory=list)
    minVal_G_RP: float 
    maxVal_G_RP: float


@dataclass(kw_only=True)
class TotalBins:
    bins: list[Bin] = field(default_factory=list)
    
test = [-10., 19., -4., -5., -7., 5., 6., 3., 9., 15., 18.,0.]    
nDiv = 10
maxVal, minVal, binVal = getBinSize(test, nDiv)

print("values are", minVal, maxVal, binVal)

totBins = TotalBins()

for j in range(0, nDiv):
    minMag_G_RP = minVal+(binVal * j)
    maxMag_G_RP = minVal + (binVal *(j+1))
    print(f'Bin number -> {j}: [{minVal+(binVal * j)}, {minVal + (binVal *(j+1))}]')
    newBin = Bin(minVal_G_RP=minMag_G_RP, maxVal_G_RP=maxMag_G_RP)
    for data in test:
        if minMag_G_RP <= data < maxMag_G_RP:
            
    totBins.bins.append(newBin)
    
print(totBins)
print(totBins.bins[0])

values are -10.0 19.0 2.9
Bin number -> 0: [-10.0, -7.1]
Bin number -> 1: [-7.1, -4.2]
Bin number -> 2: [-4.2, -1.3000000000000007]
Bin number -> 3: [-1.3000000000000007, 1.5999999999999996]
Bin number -> 4: [1.5999999999999996, 4.5]
Bin number -> 5: [4.5, 7.399999999999999]
Bin number -> 6: [7.399999999999999, 10.3]
Bin number -> 7: [10.3, 13.2]
Bin number -> 8: [13.2, 16.099999999999998]
Bin number -> 9: [16.099999999999998, 19.0]
TotalBins(bins=[Bin(ID=0, params=[], minVal_G_RP=-10.0, maxVal_G_RP=-7.1), Bin(ID=0, params=[], minVal_G_RP=-7.1, maxVal_G_RP=-4.2), Bin(ID=0, params=[], minVal_G_RP=-4.2, maxVal_G_RP=-1.3000000000000007), Bin(ID=0, params=[], minVal_G_RP=-1.3000000000000007, maxVal_G_RP=1.5999999999999996), Bin(ID=0, params=[], minVal_G_RP=1.5999999999999996, maxVal_G_RP=4.5), Bin(ID=0, params=[], minVal_G_RP=4.5, maxVal_G_RP=7.399999999999999), Bin(ID=0, params=[], minVal_G_RP=7.399999999999999, maxVal_G_RP=10.3), Bin(ID=0, params=[], minVal_G_RP=10.3, maxVal_G_RP=13.2), 

In [26]:
499.0 - 449.09999999999997

49.900000000000034