In [1]:
import numpy as np
import pandas as pd
import astropy.io.fits as fits
from scipy import integrate
# import healpy as hp
import matplotlib.pyplot as plt
from astropy.cosmology import Planck15 as cosmo
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
#from completeness import create_completeness_dataframe

In [2]:
# Loading the data
galaxy_catalog_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/GAMA/merged/StellarMass-gkvScience/mergedStellarMass-gkvScience')[1].data    # galaxies
efeds_raw = fits.open('/home/farnoosh/Nextcloud/Master_Thesis_all/codes/data/eFEDS/Mathias_Klug/efeds_members.fit')[1].data    # clusters

In [3]:
# Masks
cluster_mask = (
    ~np.isnan(efeds_raw['VDISP']) &  # mask clusters which don't have VDISP values
    (efeds_raw['RA'] > 129.0) &
    (efeds_raw['RA'] < 141.0) &
    (efeds_raw['DEC'] > -2.0) &
    (efeds_raw['DEC'] < 3.0)
)

galaxy_mask = (
    (galaxy_catalog_raw['uberclass'] == 1) &  # galaxy
    (galaxy_catalog_raw['duplicate'] == False) &
    (galaxy_catalog_raw['mask'] == False) &
    (galaxy_catalog_raw['NQ'] > 2) &
    (galaxy_catalog_raw['SC'] > 7) &
    (galaxy_catalog_raw['mstar'] > 0) &
    (galaxy_catalog_raw['RAcen'] > 126.6108) &
    (galaxy_catalog_raw['RAcen'] < 145.03073) &
    (galaxy_catalog_raw['Deccen'] > -2.56405) &
    (galaxy_catalog_raw['Deccen'] < 5.70157) &
    (galaxy_catalog_raw['Z'] < 1.31) &
    (galaxy_catalog_raw['starmask'] == False)
)

galaxy_catalog = galaxy_catalog_raw[galaxy_mask]
efeds_catalog = efeds_raw[cluster_mask]
print('number of the galaxies after masking: ', len(galaxy_catalog))
print('number of the clusters after masking: ', len(efeds_catalog))

number of the galaxies after masking:  36801
number of the clusters after masking:  150


In [4]:
# Constants
H0 = 70
c = 3e5
arcmin_to_rad = np.pi / 180 / 60

In [5]:
# Functions
def distance_from_redshift(z):
    return c * z / H0   # in Mpc

def cluster_volume(cluster_radius_Mpc):
    return (4/3) * np.pi * cluster_radius_Mpc**3    # Mpc**3



def redshift_comparison(cluster_z, cluster_Velocity_Dispersion, galaxy_z, threshold_factor=1):
    delta_z = (cluster_Velocity_Dispersion / c) * threshold_factor
    return np.abs(galaxy_z - cluster_z) <= delta_z      # dimensionless




def add_z_passed_galaxies_column(cluster_df, galaxy_df, threshold_factor=1):
    z_passed_galaxies_list = []
    num_passed_galaxies_list = []

    for i, cluster in cluster_df.iterrows():
        passed_galaxies_cluster = []
        for j, galaxy in galaxy_df.iterrows():
            if redshift_comparison(cluster['z'], cluster['cluster_Velocity_Dispersion'], galaxy['z'], threshold_factor):
                passed_galaxies_cluster.append(galaxy['g_ID'])
        z_passed_galaxies_list.append(passed_galaxies_cluster)
        num_passed_galaxies_list.append(len(passed_galaxies_cluster))
        print(f"Cluster {cluster['c_NAME']} has {len(passed_galaxies_cluster)} galaxies that passed redshift comparison")

    cluster_df['z_passed_galaxies'] = z_passed_galaxies_list
    cluster_df['num_passed_galaxies'] = num_passed_galaxies_list
    return cluster_df
# Till now, dataframes are not changing

def angular_separation(ra1, dec1, ra2, dec2):
    ra1, dec1, ra2, dec2 = map(np.radians, [ra1, dec1, ra2, dec2])
    delta_RA = ra2 - ra1
    delta_DEC = dec2 - dec1
    w = np.sin(delta_DEC/2.0)**2 + np.cos(dec1) * np.cos(dec2) * np.sin(delta_RA/2.0)**2
    return 2 * np.arcsin(np.sqrt(w))

def projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec):
    angular_dist = angular_separation(cluster_ra, cluster_dec, galaxy_ra, galaxy_dec)
    D_A = cosmo.angular_diameter_distance(cluster_z).value
    projected_distance_Mpc = D_A * angular_dist
    return projected_distance_Mpc

def is_galaxy_in_cluster_radius(galaxy_ra, galaxy_dec, galaxy_z, cluster_ra, cluster_dec, cluster_z, cluster_radius_Mpc):
    projected_dist_Mpc = projected_distance(cluster_ra, cluster_dec, cluster_z, galaxy_ra, galaxy_dec)
    return projected_dist_Mpc <= 2 * cluster_radius_Mpc     # I am using TWICE value of R_LAMBDA




def add_position_passed_galaxies_column(cluster_df_updated, galaxy_df):
    position_passed_galaxies_list = []
    num_position_passed_galaxies_list = []

    galaxy_df['cluster_ID'] = None  # in the galaxy_dataframe from now on we have a new column that shows the galaxy is belong to this cluster_ID
    print(cluster_df_updated)
    for i, cluster in cluster_df_updated.iterrows():
        z_passed_galaxies = cluster['z_passed_galaxies']    # list of galaxies that passed the redshift check for the current cluster
        position_passed_galaxies_cluster = []   # This initializes an empty list to store the IDs of galaxies that pass the positional check for the current cluster.

        for g_id in z_passed_galaxies:  # goes through the column of cluster_df_updated['z_passed_galaxies']
            galaxy = galaxy_df[galaxy_df['g_ID'] == g_id].iloc[0]   #-> to check we found this individual galaxy only for one cluster

            if is_galaxy_in_cluster_radius(galaxy['RA'], galaxy['DEC'], galaxy['z'], cluster['RA'], cluster['DEC'], cluster['z'], cluster['cluster_radius_Mpc']):
                position_passed_galaxies_cluster.append(galaxy['g_ID'])
                galaxy_df.loc[galaxy_df['g_ID'] == g_id, 'cluster_ID'] = cluster['c_ID']

        position_passed_galaxies_list.append(position_passed_galaxies_cluster)
        num_position_passed_galaxies_list.append(len(position_passed_galaxies_cluster))
        print(f"Cluster {cluster['c_NAME']} has {len(position_passed_galaxies_cluster)} galaxies that passed positional comparison")

    cluster_df_updated['position_passed_galaxies'] = position_passed_galaxies_list
    cluster_df_updated['num_position_passed_galaxies'] = num_position_passed_galaxies_list
    cluster_df_updated['galaxies_within'] = position_passed_galaxies_list

    return cluster_df_updated, galaxy_df

In [6]:
# Dataframes
cluster_df = pd.DataFrame({
    'c_ID': efeds_catalog['ID-SRC'].byteswap().newbyteorder(),
    'c_NAME': efeds_catalog['NAME'].byteswap().newbyteorder(),
    'RA': efeds_catalog['RA'].byteswap().newbyteorder(),
    'DEC': efeds_catalog['DEC'].byteswap().newbyteorder(),
    'z': efeds_catalog['ZBEST'].byteswap().newbyteorder(),
    'distance_Mpc': distance_from_redshift(efeds_catalog['ZBEST'].byteswap().newbyteorder()),
    'cluster_radius_Mpc': efeds_catalog['R_LAMBDA'].byteswap().newbyteorder(),
    'cluster_volume_Mpc3': cluster_volume(efeds_catalog['R_LAMBDA'].byteswap().newbyteorder()),
    'cluster_Velocity_Dispersion': efeds_catalog['VDISP'].byteswap().newbyteorder()
})


galaxy_df = pd.DataFrame({
    'g_ID': galaxy_catalog['uberID'].byteswap().newbyteorder(),
    'RA': galaxy_catalog['RAcen'].byteswap().newbyteorder(),
    'DEC': galaxy_catalog['Deccen'].byteswap().newbyteorder(),
    'z': galaxy_catalog['Z'].byteswap().newbyteorder()
})

In [7]:
print(galaxy_df)
print(cluster_df)

                  g_ID          RA       DEC        z
0      138021052006945  137.992346  2.570577  0.21327
1      130020205007803  130.790764  2.651356  0.29370
2      130990151906125  131.440319 -0.506608  0.15497
3      137020960805026  137.078359  2.389936  0.29276
4      141001088208416  140.558640  0.709116  0.08310
...                ...         ...       ...      ...
36796  130990634008739  130.986376 -0.260436  0.26954
36797  135021016609990  135.025604  2.857325  0.19924
36798  135000832810514  134.799377  0.906732  0.02531
36799  140000362709494  140.241766  0.810503  0.02381
36800  137021074202062  136.971637  2.110832  0.27823

[36801 rows x 4 columns]
      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0    16471  eFEDSJ083654.6+025954  129.22772  2.99841  0.1946    834.000000   
1     4731  eFEDSJ083723.6+012413  129.34868  1.40367  0.3400   1457.142857   
2     2456  eFEDSJ083759.5-003643  129.49812 -0.61210  0.4506   1931.142857   
3     4511

we have 149 clusters, which all of them have velocity dispersion.

In [8]:
# Result of functions and updating the dataframes
cluster_df_updated = add_z_passed_galaxies_column(cluster_df, galaxy_df)



Cluster eFEDSJ083654.6+025954 has 1185 galaxies that passed redshift comparison
Cluster eFEDSJ083723.6+012413 has 222 galaxies that passed redshift comparison
Cluster eFEDSJ083759.5-003643 has 21 galaxies that passed redshift comparison
Cluster eFEDSJ083802.9+015626 has 297 galaxies that passed redshift comparison
Cluster eFEDSJ083806.9-003600 has 23 galaxies that passed redshift comparison
Cluster eFEDSJ083807.6+002501 has 223 galaxies that passed redshift comparison
Cluster eFEDSJ083834.1+020643 has 64 galaxies that passed redshift comparison
Cluster eFEDSJ083857.5+020846 has 193 galaxies that passed redshift comparison
Cluster eFEDSJ083858.5-015032 has 159 galaxies that passed redshift comparison
Cluster eFEDSJ083859.3+022841 has 158 galaxies that passed redshift comparison
Cluster eFEDSJ083900.6+020057 has 129 galaxies that passed redshift comparison
Cluster eFEDSJ083903.5-011454 has 42 galaxies that passed redshift comparison
Cluster eFEDSJ083921.0-014149 has 620 galaxies that pas

In [9]:
# cluster_dataframe is updated with the clusters that now have galaxies that passed ONLY filter of redshift (in all directions)
# First Filter only passed
print(cluster_df_updated)

      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0    16471  eFEDSJ083654.6+025954  129.22772  2.99841  0.1946    834.000000   
1     4731  eFEDSJ083723.6+012413  129.34868  1.40367  0.3400   1457.142857   
2     2456  eFEDSJ083759.5-003643  129.49812 -0.61210  0.4506   1931.142857   
3     4511  eFEDSJ083802.9+015626  129.51242  1.94074  0.3788   1623.428571   
4     5386  eFEDSJ083806.9-003600  129.52891 -0.60021  0.4342   1860.857143   
..     ...                    ...        ...      ...     ...           ...   
145   4168  eFEDSJ092136.4-001449  140.40200 -0.24707  0.3184   1364.571429   
146    857  eFEDSJ092212.0-002731  140.55033 -0.45865  0.3176   1361.142857   
147   3133  eFEDSJ092235.8-002443  140.64940 -0.41197  0.0635    272.142857   
148   1535  eFEDSJ092241.9+020719  140.67494  2.12222  0.1994    854.571429   
149  11530  eFEDSJ092346.8-005330  140.94517 -0.89183  0.3701   1586.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust

In [10]:
# Remove rows where 'num_passed_galaxies' is 0
cluster_df_updated = cluster_df_updated[cluster_df_updated['num_passed_galaxies'] > 0].reset_index(drop=True)

# Display the updated DataFrame to verify
print(cluster_df_updated)

      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0    16471  eFEDSJ083654.6+025954  129.22772  2.99841  0.1946    834.000000   
1     4731  eFEDSJ083723.6+012413  129.34868  1.40367  0.3400   1457.142857   
2     2456  eFEDSJ083759.5-003643  129.49812 -0.61210  0.4506   1931.142857   
3     4511  eFEDSJ083802.9+015626  129.51242  1.94074  0.3788   1623.428571   
4     5386  eFEDSJ083806.9-003600  129.52891 -0.60021  0.4342   1860.857143   
..     ...                    ...        ...      ...     ...           ...   
144   4168  eFEDSJ092136.4-001449  140.40200 -0.24707  0.3184   1364.571429   
145    857  eFEDSJ092212.0-002731  140.55033 -0.45865  0.3176   1361.142857   
146   3133  eFEDSJ092235.8-002443  140.64940 -0.41197  0.0635    272.142857   
147   1535  eFEDSJ092241.9+020719  140.67494  2.12222  0.1994    854.571429   
148  11530  eFEDSJ092346.8-005330  140.94517 -0.89183  0.3701   1586.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust

There was one cluster that didnt have any galaxy with the same Z for its velocity. so now we have 148 clusters that passed the filter of redshift

In [11]:
cluster_df_updated, galaxy_df_updated = add_position_passed_galaxies_column(cluster_df_updated, galaxy_df)

      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0    16471  eFEDSJ083654.6+025954  129.22772  2.99841  0.1946    834.000000   
1     4731  eFEDSJ083723.6+012413  129.34868  1.40367  0.3400   1457.142857   
2     2456  eFEDSJ083759.5-003643  129.49812 -0.61210  0.4506   1931.142857   
3     4511  eFEDSJ083802.9+015626  129.51242  1.94074  0.3788   1623.428571   
4     5386  eFEDSJ083806.9-003600  129.52891 -0.60021  0.4342   1860.857143   
..     ...                    ...        ...      ...     ...           ...   
144   4168  eFEDSJ092136.4-001449  140.40200 -0.24707  0.3184   1364.571429   
145    857  eFEDSJ092212.0-002731  140.55033 -0.45865  0.3176   1361.142857   
146   3133  eFEDSJ092235.8-002443  140.64940 -0.41197  0.0635    272.142857   
147   1535  eFEDSJ092241.9+020719  140.67494  2.12222  0.1994    854.571429   
148  11530  eFEDSJ092346.8-005330  140.94517 -0.89183  0.3701   1586.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust

In [12]:
# Updating Galaxy dataframes

# In Galaxy dataframe remove the rows which their 'cluster_ID' is 'None'
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].notna()]

# Reset the index to clean index after the rows are removed
galaxy_df_updated.reset_index(drop=True, inplace=True)
print(len(galaxy_df_updated['cluster_ID']))

474


In [13]:
cluster_df_updated_pz = add_z_passed_galaxies_column(cluster_df_updated, galaxy_df_updated)

Cluster eFEDSJ083654.6+025954 has 59 galaxies that passed redshift comparison
Cluster eFEDSJ083723.6+012413 has 4 galaxies that passed redshift comparison
Cluster eFEDSJ083759.5-003643 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083802.9+015626 has 4 galaxies that passed redshift comparison
Cluster eFEDSJ083806.9-003600 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083807.6+002501 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083834.1+020643 has 1 galaxies that passed redshift comparison
Cluster eFEDSJ083857.5+020846 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083858.5-015032 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083859.3+022841 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083900.6+020057 has 2 galaxies that passed redshift comparison
Cluster eFEDSJ083903.5-011454 has 0 galaxies that passed redshift comparison
Cluster eFEDSJ083921.0-014149 has 5 galaxies that passed redshift compariso

In [14]:
print(cluster_df_updated_pz)


      c_ID                 c_NAME         RA      DEC       z  distance_Mpc  \
0    16471  eFEDSJ083654.6+025954  129.22772  2.99841  0.1946    834.000000   
1     4731  eFEDSJ083723.6+012413  129.34868  1.40367  0.3400   1457.142857   
2     2456  eFEDSJ083759.5-003643  129.49812 -0.61210  0.4506   1931.142857   
3     4511  eFEDSJ083802.9+015626  129.51242  1.94074  0.3788   1623.428571   
4     5386  eFEDSJ083806.9-003600  129.52891 -0.60021  0.4342   1860.857143   
..     ...                    ...        ...      ...     ...           ...   
144   4168  eFEDSJ092136.4-001449  140.40200 -0.24707  0.3184   1364.571429   
145    857  eFEDSJ092212.0-002731  140.55033 -0.45865  0.3176   1361.142857   
146   3133  eFEDSJ092235.8-002443  140.64940 -0.41197  0.0635    272.142857   
147   1535  eFEDSJ092241.9+020719  140.67494  2.12222  0.1994    854.571429   
148  11530  eFEDSJ092346.8-005330  140.94517 -0.89183  0.3701   1586.142857   

     cluster_radius_Mpc  cluster_volume_Mpc3  clust

In [15]:
# Updating Galaxy dataframes

# Convert cluster_ID to the same dtype as c_ID for proper merging
galaxy_df_updated['cluster_ID'] = galaxy_df_updated['cluster_ID'].astype(cluster_df['c_ID'].dtype)

# Drop any existing 'c_NAME' columns to avoid conflicts during merge
columns_to_drop = [col for col in galaxy_df_updated.columns if 'c_NAME' in col]
galaxy_df_updated.drop(columns=columns_to_drop, inplace=True)

# Merge the DataFrame to add the cluster NAME
galaxy_df_updated = galaxy_df_updated.merge(cluster_df[['c_ID', 'c_NAME']], left_on='cluster_ID', right_on='c_ID', how='left')

# Drop the extra 'c_ID' column from the merge
galaxy_df_updated.drop(columns=['c_ID'], inplace=True)
print(len(galaxy_df_updated['cluster_ID']))

474


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galaxy_df_updated['cluster_ID'] = galaxy_df_updated['cluster_ID'].astype(cluster_df['c_ID'].dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galaxy_df_updated.drop(columns=columns_to_drop, inplace=True)


In [16]:

# Filter clusters to keep only those with galaxies that passed the position criteria
cluster_df_updated = cluster_df_updated[cluster_df_updated['num_position_passed_galaxies'] > 0].reset_index(drop=True)

# Update galaxy_df_updated to reflect only those galaxies within the filtered clusters
filtered_cluster_ids = cluster_df_updated['c_ID'].tolist()
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].isin(filtered_cluster_ids)].reset_index(drop=True)

# Display the updated DataFrame
print(galaxy_df_updated)
print(len(galaxy_df_updated['cluster_ID']))

                g_ID          RA       DEC        z  cluster_ID  \
0    133990088904009  134.499717 -0.705824  0.24617       11717   
1    134010044100374  134.541860  0.951856  0.07235        4070   
2    140000386508509  140.219406  0.717879  0.15659        6459   
3    132020203305825  132.792252  2.465094  0.38263        2101   
4    140000389808654  140.216326  0.731539  0.15739        6459   
..               ...         ...       ...      ...         ...   
469  136991114306938  136.534038 -0.430051  0.29864        9359   
470  131010161709609  131.431269  1.821439  0.05130        6215   
471  136991087411411  136.559441 -0.008810  0.19930        3259   
472  130990148605975  131.443428 -0.520688  0.15834        2214   
473  130990149606322  131.442491 -0.487985  0.15737        2214   

                    c_NAME  
0    eFEDSJ085801.1-004103  
1    eFEDSJ085805.0+010906  
2    eFEDSJ092105.0+004452  
3    eFEDSJ085119.9+022951  
4    eFEDSJ092105.0+004452  
..                   

In [17]:
print(len(galaxy_df_updated['cluster_ID']))

474


In [18]:
# Filter clusters to keep only those with galaxies that passed the position criteria
cluster_df_updated = cluster_df_updated[cluster_df_updated['num_position_passed_galaxies'] > 2].reset_index(drop=True)

# Update galaxy_df_updated to reflect only those galaxies within the filtered clusters
filtered_cluster_ids = cluster_df_updated['c_ID'].tolist()
galaxy_df_updated = galaxy_df_updated[galaxy_df_updated['cluster_ID'].isin(filtered_cluster_ids)].reset_index(drop=True)

# Check for galaxies in multiple clusters
galaxy_counts = galaxy_df_updated['g_ID'].value_counts()
multiple_clusters_galaxies = galaxy_counts[galaxy_counts > 1]

if not multiple_clusters_galaxies.empty:
    print("Galaxies found in multiple clusters:")
    for g_id, count in multiple_clusters_galaxies.items():
        print(f"Galaxy ID {g_id} is found in {count} clusters")
else:
    print("No galaxies found in multiple clusters")

No galaxies found in multiple clusters


In [19]:
# Add the column with the IDs of galaxies within each cluster
cluster_df_updated['galaxies_within'] = cluster_df_updated['position_passed_galaxies']

# Print the clusters and their galaxies
for index, row in cluster_df_updated.iterrows():
    num_galaxies = len(row['galaxies_within'])
    print(f"Cluster {row['c_NAME']} has {num_galaxies} galaxies within: {row['galaxies_within']}")

Cluster eFEDSJ083654.6+025954 has 11 galaxies within: [129020711711076, 129020725711047, 129020727111342, 129020765311309, 129020773010920, 129020809511497, 129020818111167, 129020837511203, 129020843911386, 129020867111101, 129020894311086]
Cluster eFEDSJ083930.3-014348 has 11 galaxies within: [129980691504042, 129980700203693, 129980718704484, 129980726504382, 129980737204294, 129980747104443, 129980747104360, 129980750004315, 129980751304575, 129980806004077, 129980845703771]
Cluster eFEDSJ083940.8+010416 has 18 galaxies within: [130000689012299, 130000701712294, 130000708212328, 130000679511150, 130000767611646, 130000685811355, 130000686411134, 130000699412255, 130010597801985, 130010679902465, 130010701601659, 130010708601734, 130010709502116, 130010724102332, 130010728402401, 130010728802028, 130010736202188, 130010807901039]
Cluster eFEDSJ084000.0-013109 has 4 galaxies within: [129980555905855, 129980579306061, 129980623405621, 129980633306669]
Cluster eFEDSJ084142.9+002841 has

In [20]:
print(min(cluster_df_updated['RA']))
print(max(cluster_df_updated['RA']))
print(min(cluster_df_updated['DEC']))
print(max(cluster_df_updated['DEC']))

129.22772
140.67494
-1.73019
2.99841


In [21]:
print(len(cluster_df_updated['galaxies_within']))
print(len(cluster_df_updated['position_passed_galaxies']))

46
46
