In [36]:
import numpy as np
import pandas as pd
import astropy.io.fits as fits
# import healpy as hp
from scipy import integrate
import matplotlib.pyplot as plt
from astropy.cosmology import Planck15 as cosmo
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit

In [37]:
# Loading the data
galaxy_catalog_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/GAMA/merged/StellarMass-gkvScience/mergedStellarMass-gkvScience')[1].data  # galaxies
efeds_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/eFEDS/Mathias_Klug/efeds_members.fit')[1].data  # clusters

In [38]:
print(len(efeds_raw))

542


In [39]:
# Masks
cluster_mask = (
    ~np.isnan(efeds_raw['VDISP'])  # mask clusters which doesn't have VDISP values
)

galaxy_mask = (
        (galaxy_catalog_raw['uberclass'] == 1) &  #galaxy
        (galaxy_catalog_raw['duplicate'] == False) &
        (galaxy_catalog_raw['mask'] == False) &
        (galaxy_catalog_raw['NQ'] > 2) &
        (galaxy_catalog_raw['SC'] > 7) &
        (galaxy_catalog_raw['mstar'] > 0) &
        (galaxy_catalog_raw['RAcen'] > 126.5) &
        (galaxy_catalog_raw['RAcen'] < 145.1) &
        (galaxy_catalog_raw['Deccen'] > -2.6) &
        (galaxy_catalog_raw['Deccen'] < 5.8) &
        (galaxy_catalog_raw['Z'] < 1.31) &
        (galaxy_catalog_raw['starmask'] == False)
)

galaxy_catalog = galaxy_catalog_raw[galaxy_mask]
efeds_catalog = efeds_raw[cluster_mask]
print('number of the galaxies after masking: ', len(galaxy_catalog))
print('number of the clusters after masking: ', len(efeds_catalog))

number of the galaxies after masking:  36801
number of the clusters after masking:  282


In [40]:
# Constants
H0 = 70
c = 3e5
arcmin_to_rad = np.pi / 180 / 60


In [41]:
# Functions
def distance_from_redshift(z):
    return c * z / H0

def cluster_volume(cluster_radius_Mpc):
    return (4/3) * np.pi * cluster_radius_Mpc**3

def angular_separation(ra1, dec1, ra2, dec2):
    ra1, dec1, ra2, dec2 = map(np.radians, [ra1, dec1, ra2, dec2])
    delta_RA = ra2 - ra1
    delta_DEC = dec2 - dec1
    w = np.sin(delta_DEC/2.0)**2 + np.cos(dec1) * np.cos(dec2) * np.sin(delta_RA/2.0)**2
    return 2 * np.arcsin(np.sqrt(w))

def projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec):
    angular_dist = angular_separation(cluster_ra, cluster_dec, galaxy_ra, galaxy_dec)
    D_A = cosmo.angular_diameter_distance(cluster_z).value
    projected_distance_Mpc = D_A * angular_dist
    return projected_distance_Mpc

def is_galaxy_in_cluster(galaxy_ra, galaxy_dec, galaxy_z, cluster_ra, cluster_dec, cluster_z, cluster_radius_Mpc):
    projected_dist_Mpc = projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec)
    return projected_dist_Mpc <= 2 * cluster_radius_Mpc

def redshift_comparison(cluster_z, cluster_Velocity_Dispersion, galaxy_z, threshold_factor=1):
    delta_z = (cluster_Velocity_Dispersion / c) * threshold_factor
    return np.abs(galaxy_z - cluster_z) <= delta_z

def add_position_passed_galaxies_column(cluster_df, galaxy_df):
    position_passed_galaxies_list = []
    num_position_passed_galaxies_list = []

    galaxy_df['cluster_ID'] = None

    for i, cluster in cluster_df.iterrows():
        position_passed_galaxies_cluster = []
        for j, galaxy in galaxy_df.iterrows():
            if is_galaxy_in_cluster(galaxy['RA'], galaxy['DEC'], galaxy['z'], cluster['RA'], cluster['DEC'], cluster['z'], cluster['cluster_radius_Mpc']):
                position_passed_galaxies_cluster.append(galaxy['g_ID'])
                galaxy_df.loc[galaxy_df['g_ID'] == galaxy['g_ID'], 'cluster_ID'] = cluster['c_ID']

        position_passed_galaxies_list.append(position_passed_galaxies_cluster)
        num_position_passed_galaxies_list.append(len(position_passed_galaxies_cluster))
        print(f"Cluster {cluster['c_NAME']} has {len(position_passed_galaxies_cluster)} galaxies that passed positional comparison")

    cluster_df['position_passed_galaxies'] = position_passed_galaxies_list
    cluster_df['num_position_passed_galaxies'] = num_position_passed_galaxies_list

    return cluster_df, galaxy_df

def add_z_passed_galaxies_column(cluster_df, galaxy_df, threshold_factor=1):
    z_passed_galaxies_list = []
    num_passed_galaxies_list = []

    for i, cluster in cluster_df.iterrows():
        position_passed_galaxies = cluster['position_passed_galaxies']
        passed_galaxies_cluster = []

        for g_id in position_passed_galaxies:
            galaxy = galaxy_df[galaxy_df['g_ID'] == g_id].iloc[0]
            if redshift_comparison(cluster['z'], cluster['cluster_Velocity_Dispersion'], galaxy['z'], threshold_factor):
                passed_galaxies_cluster.append(galaxy['g_ID'])

        z_passed_galaxies_list.append(passed_galaxies_cluster)
        num_passed_galaxies_list.append(len(passed_galaxies_cluster))
        print(f"Cluster {cluster['c_NAME']} has {len(passed_galaxies_cluster)} galaxies that passed redshift comparison")

    cluster_df['z_passed_galaxies'] = z_passed_galaxies_list
    cluster_df['num_passed_galaxies'] = num_passed_galaxies_list
    cluster_df['galaxies_within'] = z_passed_galaxies_list

    return cluster_df

In [42]:
# Dataframes
cluster_df = pd.DataFrame({
    'c_ID': efeds_catalog['ID-SRC'].byteswap().newbyteorder(),
    'c_NAME': efeds_catalog['NAME'].byteswap().newbyteorder(),
    'RA': efeds_catalog['RA'].byteswap().newbyteorder(),
    'DEC': efeds_catalog['DEC'].byteswap().newbyteorder(),
    'z': efeds_catalog['ZBEST'].byteswap().newbyteorder(),
    'distance_Mpc': distance_from_redshift(efeds_catalog['ZBEST'].byteswap().newbyteorder()),
    'cluster_radius_Mpc': efeds_catalog['R_LAMBDA'].byteswap().newbyteorder(),
    'cluster_volume_Mpc3': cluster_volume(efeds_catalog['R_LAMBDA'].byteswap().newbyteorder()),
    'cluster_Velocity_Dispersion': efeds_catalog['VDISP'].byteswap().newbyteorder()
})

galaxy_df = pd.DataFrame({
    'g_ID': galaxy_catalog['uberID'].byteswap().newbyteorder(),
    'RA': galaxy_catalog['RAcen'].byteswap().newbyteorder(),
    'DEC': galaxy_catalog['Deccen'].byteswap().newbyteorder(),
    'z': galaxy_catalog['Z'].byteswap().newbyteorder()
})

In [44]:
print(cluster_df)

      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0     4800  eFEDSJ082808.7-001003  127.03664 -0.16772  0.0762    326.571429   
1     7991  eFEDSJ082840.6-000500  127.16920 -0.08355  0.3197   1370.142857   
2     5601  eFEDSJ083110.5+015616  127.79401  1.93784  0.4195   1797.857143   
3     2659  eFEDSJ083125.9+015533  127.85804  1.92590  0.6845   2933.571429   
4     3725  eFEDSJ083137.9+004632  127.90807  0.77581  0.3023   1295.571429   
..     ...                    ...        ...      ...     ...           ...   
277   1761  eFEDSJ093546.3-000115  143.94326 -0.02110  0.3497   1498.714286   
278  11748  eFEDSJ093707.4+034831  144.28102  3.80886  0.2709   1161.000000   
279     38  eFEDSJ093712.8+031651  144.30357  3.28104  0.2474   1060.285714   
280  10322  eFEDSJ093830.5+041523  144.62717  4.25640  0.2622   1123.714286   
281   1594  eFEDSJ093938.3+042218  144.90960  4.37172  0.3687   1580.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust

In [45]:
print(galaxy_df)

                  g_ID          RA       DEC        z
0      138021052006945  137.992346  2.570577  0.21327
1      130020205007803  130.790764  2.651356  0.29370
2      130990151906125  131.440319 -0.506608  0.15497
3      137020960805026  137.078359  2.389936  0.29276
4      141001088208416  140.558640  0.709116  0.08310
...                ...         ...       ...      ...
36796  130990634008739  130.986376 -0.260436  0.26954
36797  135021016609990  135.025604  2.857325  0.19924
36798  135000832810514  134.799377  0.906732  0.02531
36799  140000362709494  140.241766  0.810503  0.02381
36800  137021074202062  136.971637  2.110832  0.27823

[36801 rows x 4 columns]


In [46]:
# Result of functions and updating the dataframes
cluster_df_updated, galaxy_df_updated = add_position_passed_galaxies_column(cluster_df, galaxy_df)

Cluster eFEDSJ082808.7-001003 has 0 galaxies that passed positional comparison
Cluster eFEDSJ082840.6-000500 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083110.5+015616 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083125.9+015533 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083137.9+004632 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083153.6+012530 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083204.4+041907 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083228.0-000656 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083315.5+000623 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083322.6-011128 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083330.4+050427 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083345.8+004208 has 0 galaxies that passed positional comparison
Cluster eFEDSJ083412.7+035856 has 0 galaxies that pa

In [49]:
clusters_with_galaxies_in_their_position = cluster_df_updated[cluster_df_updated['num_position_passed_galaxies'] > 0]
num_clusters_with_galaxies_in_their_position = len(clusters_with_galaxies_in_their_position)
print(f"Number of clusters with any number of galaxies within them after position filter: {num_clusters_with_galaxies_in_their_position}")

Number of clusters with any number of galaxies within them after position filter: 157


In [47]:
# Continue with the redshift filter
cluster_df_updated = add_z_passed_galaxies_column(cluster_df_updated, galaxy_df_updated)

Cluster eFEDSJ082808.7-001003 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ082840.6-000500 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083110.5+015616 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083125.9+015533 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083137.9+004632 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083153.6+012530 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083204.4+041907 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083228.0-000656 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083315.5+000623 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083322.6-011128 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083330.4+050427 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083345.8+004208 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083412.7+035856 has 0 galaxies that passed redshift comparison

In [48]:
print(len(galaxy_df_updated['cluster_ID']))
print(len(cluster_df['position_passed_galaxies']))
print(len(cluster_df_updated['position_passed_']))

36801
282


KeyError: 'position_passed_'

In [None]:
# Updating Galaxy dataframes

# In Galaxy dataframe remove the rows which their 'cluster_ID' is 'None'
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].notna()]

# Reset the index to clean index after the rows are removed
galaxy_df_updated.reset_index(drop=True, inplace=True)
print(len(galaxy_df_updated['cluster_ID']))

In [None]:
# Updating Galaxy dataframes

# Convert cluster_ID to the same dtype as c_ID for proper merging
galaxy_df_updated['cluster_ID'] = galaxy_df_updated['cluster_ID'].astype(cluster_df['c_ID'].dtype)

# Drop any existing 'c_NAME' columns to avoid conflicts during merge
columns_to_drop = [col for col in galaxy_df_updated.columns if 'c_NAME' in col]
galaxy_df_updated.drop(columns=columns_to_drop, inplace=True)

# Merge the DataFrame to add the cluster NAME
galaxy_df_updated = galaxy_df_updated.merge(cluster_df[['c_ID', 'c_NAME']], left_on='cluster_ID', right_on='c_ID', how='left')

# Drop the extra 'c_ID' column from the merge
galaxy_df_updated.drop(columns=['c_ID'], inplace=True)
print(len(galaxy_df_updated['cluster_ID']))

In [None]:
# Filter clusters to keep only those with galaxies that passed the redshift criteria
cluster_df_updated = cluster_df_updated[cluster_df_updated['num_passed_galaxies'] > 0].reset_index(drop=True)

# Update galaxy_df_updated to reflect only those galaxies within the filtered clusters
filtered_cluster_ids = cluster_df_updated['c_ID'].tolist()
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].isin(filtered_cluster_ids)].reset_index(drop=True)

# Display the updated DataFrame
print(galaxy_df_updated)
print(len(galaxy_df_updated['cluster_ID']))
