# Option 1
(If we want to use python, that is)

In [2]:
# Importing python packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy as sp

In [3]:
# Importing csvs using Panda
inputHarrisPartI = pd.read_csv("Option 1/HarrisPartI.csv", index_col=False)
inputHarrisPartIII = pd.read_csv("Option 1/HarrisPartIII.csv", index_col=False)
inputKrause21 = pd.read_csv("Option 1/Krause21.csv", index_col=False)
inputVandenBerg = pd.read_csv("Option 1/vandenBerg_table2.csv", index_col=False)

# Printing csv for Harris Part I with a header in red colour
print("\033[31mHarrisPartI\033[0m")
print(inputHarrisPartI)
# Printing csv for Harris Part III with a header in red colour
print()
print()
print("\033[31mHarrisPartIII\033[0m")
print(inputHarrisPartIII)
# Printing csv for Krause21 with a header in red colour
print()
print()
print("\033[31mKrause21\033[0m")
print(inputKrause21)
# Printing csv for VandenBerg with a header in red colour
print()
print()
print("\033[31mVandenBerg\033[0m")
print(inputVandenBerg)

[31mHarrisPartI[0m
            ID    Name           RA          DEC       L      B  R_Sun  R_gc  \
0      NGC 104  47 Tuc  00:24:05.67  -72:04:52.6  305.89 -44.89    4.5   7.4   
1      NGC 288     NaN  00:52:45.24  -26:34:57.4  152.30 -89.38    8.9  12.0   
2      NGC 362     NaN  01:03:14.26  -70:50:55.6  301.53 -46.25    8.6   9.4   
3    Whiting 1     NaN     02:02:57    -03:15:10  161.22 -60.76   30.1  34.5   
4     NGC 1261     NaN  03:12:16.21  -55:12:58.4  270.54 -52.12   16.3  18.1   
..         ...     ...          ...          ...     ...    ...    ...   ...   
152   NGC 7089     M 2  21:33:27.02  -00:49:23.7   53.37 -35.77   11.5  10.4   
153   NGC 7099    M 30  21:40:22.12  -23:10:47.5   27.18 -46.84    8.1   7.1   
154     Pal 12     NaN  21:46:38.84  -21:15:09.4   30.51 -47.68   19.0  15.8   
155     Pal 13     NaN  23:06:44.44  +12:46:19.2   87.10 -42.70   26.0  26.9   
156   NGC 7492     NaN  23:08:26.63  -15:36:41.4   53.39 -63.48   26.3  25.3   

        X     Y   

https://physics.mcmaster.ca/~harris/mwgc.dat

###  Part I:  Identifications and Positional Data

| Key | Meaning |
| --- | ---------------------|
| ID | Cluster identification number |
| Name | Other commonly used cluster name |
| RA , DEC | Right ascension and declination (epoch J2000) |
| L , B | Galactic longitude and latitude (degrees) |
| R_Sun | Distance from Sun (kiloparsecs) |
|R_gc | Distance from Galactic center (kpc), assuming R_0=8.0 kpc
| X, Y, Z | Galactic distance components X,Y,Z in kiloparsecs, in a Sun-centered coordinate system; X points toward Galactic center, Y in direction of Galactic rotation, Z toward North Galactic Pole |

###  Part II:  Metallicity and Photometry
(NOTE: This data wasn't provided on Moodle, though we can still use it if we'd like to. It can be found in the link above)

| Key | Meaning |
| --- | ---------------------|
| ID | Cluster identification |
| [Fe/H] | Metallicity [Fe/H] |
| wt | Weight of mean metallicity; essentially the number of independent [Fe/H] measurements averaged together.  See bibliography for full description |
| E(B-V) | Foreground reddening |
| V_HB | V magnitude level of the horizontal branch (or RR Lyraes) |
| (m-M)V | Apparent visual distance modulus |
| V_t | Integrated V magnitude of the cluster |
| M_V,t | Absolute visual magnitude (cluster luminosity),  M_V,t = V_t - (m-M)V |
| U-B   B-V   V-R   V-I | Integrated color indices (uncorrected for reddening) |
| spt | Spectral type of the integrated cluster light |
| ellip | Projected ellipticity of isophotes, e = 1-(b/a) |

### Part III:  Velocities and Structural Parameters

| Key | Meaning |
| --- | ---------------------|
| ID | Cluster identification |
| v_r | Heliocentric radial velocity (km/s) |
| +/- | Observational (internal) uncertainty in radial velocity |
| v_LSR | Radial velocity relative to Solar neighborhood LSR |
| sig_v | Central velocity dispersion sig_v (km/s) |
| +/- | Observational (internal) uncertainty in velocity dispersion |
| c | King-model central concentration, c = log(r_t/r_c); a 'c' denotes a core-collapsed cluster |
| r_c | Core radius in arcmin |
| r_h | Half-light radius in arcmin |
| mu_V | Central surface brightness, V magnitudes per square arcsecond |
| rho_0 | Central luminosity density, log_10(Solar luminosities per cubic parsec) |
| lg(tc) | Core relaxation time t(r_c), in log_10(years) |
| lg(th) | Median relaxation time t(r_h), in log_10(years) |

### vandenBerg_table2

https://arxiv.org/pdf/1308.2257

(page 48)
| Key | Meaning |
| --- | --------------------- |
| NGC | Cluster identification |
| Name | |
| FeH | adopted [Fe/H] values |
| Age | |
| Age_err | |
| Method | whether the adopted age is based primarly on the vertical, or the horizontal, method |
| Figs |
| Range |
| HBtype | HB type (from Mackey & van den Bergh 2005) |
| R_G | Galactocentric distance (in kpc) |
| M_V | absolute integrated visual magnitude |
| v_e0 | the central escape velocity (in km/s) |
| log_sigma_0 | the common logarithm of surface density of stars at the cluster center (in M./pc2) |

In [None]:
# Checking number of rows to determine if dataframe merging causes loss of data
print("Rows inputHarrisPartI:", len(inputHarrisPartI))
print("Rows inputHarrisPartIII:", len(inputHarrisPartIII))
print("Rows inputKrause21:", len(inputKrause21))
print("Rows inputVandenBerg:", len(inputVandenBerg))

# Merged dataframes
merged = pd.merge(inputHarrisPartI, inputHarrisPartIII, on='ID')
print("Merge 1, Rows:", len(merged))


# --------------------------------------------------------------
# Converting inputKrause21.csv 'Object' entries (e.g. "NGC288" -> "NGC 288", or )
import re

def convert_object_to_catid(object_str: str):
    return re.sub("[0-9]+", lambda ele: " " + ele[0], object_str)

# # Debugging function
# print(inputKrause21['Object'][1])
# print(inputKrause21['Object'].apply(convert_object_to_catid)[1])
# print(inputKrause21['Object'][60])
# print(inputKrause21['Object'].apply(convert_object_to_catid)[60])

# ---------------------------------------------------------------

def convert_ngc_to_catid(object_str: str):
    prefix = "NGC "
    return prefix + object_str

# # Debugging function
# print(inputVandenBerg['#NGC'][1])
# print(inputVandenBerg['#NGC'].apply(convert_ngc_to_catid)[1])

# ---------------------------------------------------------------

inputKrause21['Object'] = inputKrause21['Object'].apply(convert_object_to_catid)


merged = pd.merge(merged, inputKrause21, left_on='ID', right_on='Object')
print("Merge 2, Rows:", len(merged)) # Lost the Ruprecht106 and Palomar12 rows, which is why 59 rows left instead of 61 

inputVandenBerg['#NGC'] = inputVandenBerg['#NGC'].apply(convert_ngc_to_catid)

merged = pd.merge(merged, inputVandenBerg, left_on='ID', right_on='#NGC')
print("Merge 3, Rows:", len(merged)) # Lost the Ruprecht106 and Palomar12 rows, which is why 59 rows left instead of 61

print(merged)


Rows inputHarrisPartI: 157
Rows inputHarrisPartIII: 157
Rows inputKrause21: 61
Rows inputVandenBerg: 55
Merge 1, Rows: 157
Merge 2, Rows: 59
0          NGC 104
1          NGC 288
2          NGC 362
3         NGC 1261
4         NGC 1851
          ...     
56        NGC 7089
57        NGC 7099
58    Ruprecht 106
59        Terzan 7
60      Palomar 12
Name: Object, Length: 61, dtype: object
Merge 3, Rows: 51
          ID    Name           RA          DEC       L      B  R_Sun  R_gc  \
0    NGC 104  47 Tuc  00:24:05.67  -72:04:52.6  305.89 -44.89    4.5   7.4   
1    NGC 288     NaN  00:52:45.24  -26:34:57.4  152.30 -89.38    8.9  12.0   
2    NGC 362     NaN  01:03:14.26  -70:50:55.6  301.53 -46.25    8.6   9.4   
3   NGC 1261     NaN  03:12:16.21  -55:12:58.4  270.54 -52.12   16.3  18.1   
4   NGC 1851     NaN  05:14:06.76  -40:02:47.6  244.51 -35.03   12.1  16.6   
5   NGC 2808     NaN  09:12:03.10  -64:51:48.6  282.19 -11.25    9.6  11.1   
6   NGC 3201     NaN  10:17:36.82  -46:24:44.9