In [None]:
import numpy as np
import pandas as pd
import astropy.io.fits as fits
import healpy as hp
from scipy import integrate
import matplotlib.pyplot as plt
from astropy.cosmology import Planck15 as cosmo
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
#from completeness import create_completeness_dataframe

In [None]:
# Loading the data
galaxy_catalog_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/GAMA/merged/StellarMass-gkvScience/mergedStellarMass-gkvScience')[1].data    # galaxies
efeds_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/eFEDS/Mathias_Klug/efeds_members.fit')[1].data    # clusters

In [None]:
# Masks
cluster_mask =(
            ~np.isnan(efeds_raw['VDISP'])   # mask clusters which doesn't have VDISP values
 )


galaxy_mask = (
         (galaxy_catalog_raw['uberclass'] == 1 ) & #galaxy
         (galaxy_catalog_raw['duplicate'] == False) &
         (galaxy_catalog_raw['mask'] == False) &
         (galaxy_catalog_raw['NQ'] > 2) &
         (galaxy_catalog_raw['SC'] > 7) &
         (galaxy_catalog_raw['mstar'] > 0) &
         (galaxy_catalog_raw['RAcen'] > 126.5) &
         (galaxy_catalog_raw['RAcen'] < 145.1) &
         (galaxy_catalog_raw['Deccen'] > -2.6) &
         (galaxy_catalog_raw['Deccen'] < 5.8) &
         (galaxy_catalog_raw['Z'] < 1.31) &
         (galaxy_catalog_raw['starmask'] == False)
)

galaxy_catalog = galaxy_catalog_raw[galaxy_mask]
efeds_catalog = efeds_raw[cluster_mask]
print('number of the galaxies after masking: ',len(galaxy_catalog))
print('number of the clusters after masking: ', len(efeds_catalog))

In [None]:
# Constants
H0 = 70
c = 3e5
arcmin_to_rad = np.pi / 180 / 60

In [218]:
# Functions
def distance_from_redshift(z):
    return c * z / H0

def cluster_volume(cluster_radius_Mpc):
    return (4/3) * np.pi * cluster_radius_Mpc**3

def redshift_comparison(cluster_z, cluster_Velocity_Dispersion, galaxy_z, threshold_factor=1):
    delta_z = (cluster_Velocity_Dispersion / c) * threshold_factor
    return np.abs(galaxy_z - cluster_z) <= delta_z

def add_z_passed_galaxies_column(cluster_df, galaxy_df, threshold_factor=1):
    def redshift_comparison(cluster_z, cluster_Velocity_Dispersion, galaxy_z):
        delta_z = (cluster_Velocity_Dispersion / c) * threshold_factor
        return np.abs(galaxy_z - cluster_z) <= delta_z

    z_passed_galaxies_list = []
    num_passed_galaxies_list = []

    for i, cluster in cluster_df.iterrows():
        passed_galaxies_cluster = []
        for j, galaxy in galaxy_df.iterrows():
            if redshift_comparison(cluster['z'], cluster['cluster_Velocity_Dispersion'], galaxy['z']):
                passed_galaxies_cluster.append(galaxy['g_ID'])
        z_passed_galaxies_list.append(passed_galaxies_cluster)
        num_passed_galaxies_list.append(len(passed_galaxies_cluster))

    cluster_df['z_passed_galaxies'] = z_passed_galaxies_list
    cluster_df['num_passed_galaxies'] = num_passed_galaxies_list

    return cluster_df

def angular_separation(ra1, dec1, ra2, dec2):
    ra1, dec1, ra2, dec2 = map(np.radians, [ra1, dec1, ra2, dec2])
    delta_RA = ra2 - ra1
    delta_DEC = dec2 - dec1
    w = np.sin(delta_DEC/2.0)**2 + np.cos(dec1) * np.cos(dec2) * np.sin(delta_RA/2.0)**2
    return 2 * np.arcsin(np.sqrt(w))

def projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec):
    angular_dist = angular_separation(cluster_ra, cluster_dec, galaxy_ra, galaxy_dec)
    D_A = cosmo.angular_diameter_distance(cluster_z).value
    projected_distance_Mpc = D_A * angular_dist
    return projected_distance_Mpc

def is_galaxy_in_cluster(galaxy_ra, galaxy_dec, galaxy_z, cluster_ra, cluster_dec, cluster_z, cluster_radius_Mpc):
    projected_dist_Mpc = projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec)
    return projected_dist_Mpc <= cluster_radius_Mpc

def add_position_passed_galaxies_column(cluster_df_updated, galaxy_df):
    position_passed_galaxies_list = []
    num_position_passed_galaxies_list = []

    galaxy_df['cluster_ID'] = None

    for i, cluster in cluster_df_updated.iterrows():
        z_passed_galaxies = cluster['z_passed_galaxies']
        position_passed_galaxies_cluster = []

        for g_id in z_passed_galaxies:
            galaxy = galaxy_df[galaxy_df['g_ID'] == g_id].iloc[0]

            if is_galaxy_in_cluster(galaxy['RA'], galaxy['DEC'], galaxy['z'], cluster['RA'], cluster['DEC'], cluster['z'], cluster['cluster_radius_Mpc']):
                position_passed_galaxies_cluster.append(galaxy['g_ID'])
                galaxy_df.loc[galaxy_df['g_ID'] == g_id, 'cluster_ID'] = cluster['c_ID']

        position_passed_galaxies_list.append(position_passed_galaxies_cluster)
        num_position_passed_galaxies_list.append(len(position_passed_galaxies_cluster))

    cluster_df_updated['position_passed_galaxies'] = position_passed_galaxies_list
    cluster_df_updated['num_position_passed_galaxies'] = num_position_passed_galaxies_list
    cluster_df_updated['galaxies_within'] = position_passed_galaxies_list  # Add this line

    return cluster_df_updated, galaxy_df

In [219]:
# Dataframes
cluster_df = pd.DataFrame({
    'c_ID': efeds_catalog['ID-SRC'].byteswap().newbyteorder(),
    'c_NAME': efeds_catalog['NAME'].byteswap().newbyteorder(),
    'RA': efeds_catalog['RA'].byteswap().newbyteorder(),
    'DEC': efeds_catalog['DEC'].byteswap().newbyteorder(),
    'z': efeds_catalog['ZBEST'].byteswap().newbyteorder(),
    'distance_Mpc': distance_from_redshift(efeds_catalog['ZBEST'].byteswap().newbyteorder()),
    'cluster_radius_Mpc': efeds_catalog['R_LAMBDA'].byteswap().newbyteorder(),
    'cluster_volume_Mpc3': cluster_volume(efeds_catalog['R_LAMBDA'].byteswap().newbyteorder()),
    'cluster_Velocity_Dispersion': efeds_catalog['VDISP'].byteswap().newbyteorder()
})


galaxy_df = pd.DataFrame({
    'g_ID': galaxy_catalog['uberID'].byteswap().newbyteorder(),
    'RA': galaxy_catalog['RAcen'].byteswap().newbyteorder(),
    'DEC': galaxy_catalog['Deccen'].byteswap().newbyteorder(),
    'z': galaxy_catalog['Z'].byteswap().newbyteorder()
})

In [None]:
# Result of functions and updating the dataframes
cluster_df_updated = add_z_passed_galaxies_column(cluster_df, galaxy_df)
cluster_df_updated, galaxy_df_updated = add_position_passed_galaxies_column(cluster_df_updated, galaxy_df)

In [224]:
# Updating Galaxy dataframes

# Remove rows where 'cluster_ID' is 'None'
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].notna()]

# Reset the index to clean index after the rows are removed
galaxy_df_updated.reset_index(drop=True, inplace=True)

# Convert cluster_ID to the same dtype as c_ID for proper merging
galaxy_df_updated['cluster_ID'] = galaxy_df_updated['cluster_ID'].astype(cluster_df['c_ID'].dtype)

# Drop any existing 'c_NAME' columns to avoid conflicts during merge
columns_to_drop = [col for col in galaxy_df_updated.columns if 'c_NAME' in col]
galaxy_df_updated.drop(columns=columns_to_drop, inplace=True)

# Merge the DataFrame to add the cluster NAME
galaxy_df_updated = galaxy_df_updated.merge(cluster_df[['c_ID', 'c_NAME']], left_on='cluster_ID', right_on='c_ID', how='left')

# Drop the extra 'c_ID' column from the merge
galaxy_df_updated.drop(columns=['c_ID'], inplace=True)

# Display the updated DataFrame
print(galaxy_df_updated)


                g_ID          RA       DEC        z  cluster_ID  \
0    133990088904009  134.499717 -0.705824  0.24617       11717   
1    140000391308781  140.214904  0.743549  0.15698        6459   
2    140000390908755  140.215330  0.741024  0.15684        6459   
3    132000006003287  132.577598  0.226102  0.19730        1023   
4    130990177506182  131.416193 -0.501206  0.15686        2214   
..               ...         ...       ...      ...         ...   
277  131991226006338  131.428884 -0.486504  0.15821        2214   
278  136991114306938  136.534038 -0.430051  0.29864        9359   
279  136991087411411  136.559441 -0.008810  0.19930        3259   
280  130990148605975  131.443428 -0.520688  0.15834        2214   
281  130990149606322  131.442491 -0.487985  0.15737        2214   

                    c_NAME  
0    eFEDSJ085801.1-004103  
1    eFEDSJ092105.0+004452  
2    eFEDSJ092105.0+004452  
3    eFEDSJ085027.8+001503  
4    eFEDSJ084544.3-002914  
..                   

In [225]:
print(cluster_df_updated)

      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0     4800  eFEDSJ082808.7-001003  127.03664 -0.16772  0.0762    326.571429   
1     7991  eFEDSJ082840.6-000500  127.16920 -0.08355  0.3197   1370.142857   
2     5601  eFEDSJ083110.5+015616  127.79401  1.93784  0.4195   1797.857143   
3     2659  eFEDSJ083125.9+015533  127.85804  1.92590  0.6845   2933.571429   
4     3725  eFEDSJ083137.9+004632  127.90807  0.77581  0.3023   1295.571429   
..     ...                    ...        ...      ...     ...           ...   
277   1761  eFEDSJ093546.3-000115  143.94326 -0.02110  0.3497   1498.714286   
278  11748  eFEDSJ093707.4+034831  144.28102  3.80886  0.2709   1161.000000   
279     38  eFEDSJ093712.8+031651  144.30357  3.28104  0.2474   1060.285714   
280  10322  eFEDSJ093830.5+041523  144.62717  4.25640  0.2622   1123.714286   
281   1594  eFEDSJ093938.3+042218  144.90960  4.37172  0.3687   1580.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust