In [6]:
import astropy.io.fits as fits
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

from galaxy_cluster_matching import match_galaxies_and_clusters
from completeness import create_completeness_dataframe
from mass_function import get_weighted_mass_histogram
from constants import MASS_BINS
from constants import REGIONS_OF_SKY

In [10]:
# Loading the data

# big survey
sciencegkv_galaxies_raw = \
fits.open('/home/farnoosh/farnoosh/Master_Thesis_all/Data/GAMA/gkvScienceCatv02/gkvScienceCatv02.fits')[1].data

# small survey (this is the main one)
galaxies_raw = fits.open('/home/farnoosh/farnoosh/Master_Thesis_all/Data/GAMA/merged/StellarMass-gkvScience/mergedStellarMass-gkvScience')[1].data

# cluster survey
clusters_raw = fits.open('/home/farnoosh/farnoosh/Master_Thesis_all/Data/eRASS/merged_clusterprimary_optical/merged_primary_optical_clsuters.fits')[1].data  # clusters

print('number of galaxies: ', len(sciencegkv_galaxies_raw))
print('number of galaxies: ', len(galaxies_raw))
print('number of the clusters: ', len(clusters_raw))

number of galaxies:  2232985
number of galaxies:  370116
number of the clusters:  12247


In [11]:
# big survey masks
sciencegkv_galaxy_mask = (
        (sciencegkv_galaxies_raw['uberclass'] == 1) &  # classified as galaxy
        (sciencegkv_galaxies_raw['Z'] < 0.4) &
        # (sciencegkv_galaxies_raw['Z'] != -9.999) &
        # (sciencegkv_galaxies_raw['SC'] > 7) &                 # 95% redshift completeness limit for SC 7, and 98% for SC 8
        # (sciencegkv_galaxies_raw['NQ'] > 2) &                 # reliable redshift
        (sciencegkv_galaxies_raw['duplicate'] == 0) &  # unique object
        (sciencegkv_galaxies_raw['mask'] == False) &
        (sciencegkv_galaxies_raw['starmask'] == False) &
        # (sciencegkv_galaxies_raw['RAcen'] > 129.0) &
        # (sciencegkv_galaxies_raw['RAcen'] < 141.0) &
        # (sciencegkv_galaxies_raw['Deccen'] > -2.0) &
        # (sciencegkv_galaxies_raw['Deccen'] < 3.0)&
        (sciencegkv_galaxies_raw['flux_rt'] >= 5.011928e-05)  # maximum magnitude of 19.65 in r-band
)

# small survey masks
galaxy_mask = (
        (galaxies_raw['uberclass'] == 1) &  # classified as galaxy
        (galaxies_raw['duplicate'] == False) &
        (galaxies_raw['mask'] == False) &
        (galaxies_raw['NQ'] > 2) &  # reliable redshift
        # (galaxies_raw['SC'] > 7) &
        (galaxies_raw['starmask'] == False) &
        (galaxies_raw['mstar'] > 0) &
        (galaxies_raw['Z'] < 0.4) &
        # (galaxies_raw['RAcen'] > 129.0) &
        # (galaxies_raw['RAcen'] < 141.0) &
        # (galaxies_raw['Deccen'] > -2.0) &
        # (galaxies_raw['Deccen'] < 3.0)  &
        (galaxies_raw['flux_rt'] >= 5.011928e-05)
)

# cluster masks
cluster_mask = (
    # ~np.isnan(clusters_raw['VDISP']) &  # mask clusters which don't have VDISP values
        (clusters_raw['VDISP_BOOT'] > 12) &  # mask clusters which don't have VDISP values
        (clusters_raw['VDISP_BOOT'] <= 10851)
    # (clusters_raw['RA'] > 128.5007) &
    # (clusters_raw['RA'] < 141.5) &
    # (clusters_raw['DEC'] > -2.05) &
    # (clusters_raw['DEC'] < 3.02)
)

big_galaxy_catalog = sciencegkv_galaxies_raw[sciencegkv_galaxy_mask]
galaxy_catalog = galaxies_raw[galaxy_mask]
cluster_catalog = clusters_raw[cluster_mask]
print('number of the galaxies in big survey after masking: ', len(big_galaxy_catalog))
print('number of the galaxies in small survey after masking: ', len(galaxy_catalog))
print('eFEDS has 150 galaxies with VDISP in G09 region, however in the same region eRAS1 has 25 clusters.')
print('number of the clusters after masking: ', len(cluster_catalog))

number of the galaxies in big survey after masking:  193150
number of the galaxies in small survey after masking:  183515
eFEDS has 150 galaxies with VDISP in G09 region, however in the same region eRAS1 has 25 clusters.
number of the clusters after masking:  1906


In [12]:
completeness_all_df = pd.DataFrame()

# Loop over each region and create the completeness DataFrame for that region
for region_name in REGIONS_OF_SKY.keys():
    completeness_region_df = create_completeness_dataframe(
        big_survey=big_galaxy_catalog,
        small_survey=galaxy_catalog,
        flux_type='flux_rt',
        region=region_name
    )
    completeness_region_df['region'] = region_name
    completeness_all_df = pd.concat([completeness_all_df, completeness_region_df], ignore_index=True)

# First few rows of the combined galaxy DataFrame
print(completeness_all_df)

  return np.cumsum(number_of_obj_small_survey)/np.cumsum(number_of_obj_big_survey)
  return np.cumsum(number_of_obj_small_survey)/np.cumsum(number_of_obj_big_survey)
  return np.cumsum(number_of_obj_small_survey)/np.cumsum(number_of_obj_big_survey)
  return np.cumsum(number_of_obj_small_survey)/np.cumsum(number_of_obj_big_survey)


                 uberID  completeness region
0       138021052006945      0.953327    G09
1       131010293406270      0.878219    G09
2       136000563202354      0.875673    G09
3       130020205007803      0.947321    G09
4       130990151906125      0.958461    G09
...                 ...           ...    ...
183509  344660640302033      0.860468    G23
183510  341690286909859      0.723858    G23
183511  339670781606938      0.671422    G23
183512  346700890705873      0.824393    G23
183513  338690341704233      0.850110    G23

[183514 rows x 3 columns]


In [5]:
# Dataframes
cluster_df = pd.DataFrame({
    'c_NAME': cluster_catalog['NAME'].byteswap().newbyteorder(),
    'RA': cluster_catalog['RA'].byteswap().newbyteorder(),
    'DEC': cluster_catalog['DEC'].byteswap().newbyteorder(),
    'z': cluster_catalog['BEST_Z'].byteswap().newbyteorder(),
    'cluster_radius_Mpc': cluster_catalog['R_LAMBDA'].byteswap().newbyteorder(),
    'cluster_Velocity_Dispersion': cluster_catalog['VDISP_BOOT'].byteswap().newbyteorder(),
    'VDISP_error': cluster_catalog['VDISP_BOOT_ERR'].byteswap().newbyteorder()
})

# Creating the galaxy DataFrame and merging with mass data 
galaxy_df = pd.DataFrame({
    'uberID': galaxy_catalog['uberID'].byteswap().newbyteorder(),
    'RA': galaxy_catalog['RAcen'].byteswap().newbyteorder(),
    'DEC': galaxy_catalog['Deccen'].byteswap().newbyteorder(),
    'z': galaxy_catalog['Z'].byteswap().newbyteorder(),
    'mstar': galaxy_catalog['mstar'].byteswap().newbyteorder()
})

print(galaxy_df)

KeyError: "Key 'R_LAMBDA' does not exist."

In [None]:
# match the galaxies with the clusters
matched_galaxy_dataframe = match_galaxies_and_clusters(galaxy_dataframe=galaxy_df, cluster_dataframe=cluster_df)
print(matched_galaxy_dataframe[matched_galaxy_dataframe['environment'] != 'Field'].head())
print(matched_galaxy_dataframe)
# merge the df with the completeness with the df with the galaxy environment
merged_galaxy_df = pd.merge(completeness_region_df, matched_galaxy_dataframe, how="inner", on="uberID")
print(merged_galaxy_df[merged_galaxy_df["environment"] != "Field"])
# get the mass histograms (galaxy mass function) and errors

# get the histogram for all galaxies
mass_histogram_all_galaxies, mass_histogram_all_galaxies_errors = get_weighted_mass_histogram(
    input_mass_completeness_dataframe=merged_galaxy_df, region_name="G09")

# get the histogram for all galaxies in the field
mass_histogram_field_only, mass_histogram_field_only_errors = get_weighted_mass_histogram(
    input_mass_completeness_dataframe=merged_galaxy_df[merged_galaxy_df["environment"] == "Field"], region_name="G09")

# get the histogram for all galaxies which are cluster members
mass_histogram_cluster_members, mass_histogram_cluster_members_error = get_weighted_mass_histogram(
    input_mass_completeness_dataframe=merged_galaxy_df[merged_galaxy_df["environment"] == "ClusterMember"],
    region_name="G09")
fig, ax = plt.subplots(figsize=(24, 16))

ratio_of_field_galaxies = len(merged_galaxy_df[merged_galaxy_df["environment"] == "Field"]) / len(merged_galaxy_df)
ratio_of_cluster_galaxies = len(merged_galaxy_df[merged_galaxy_df["environment"] == "ClusterMember"]) / len(
    merged_galaxy_df)

plt.errorbar(
    MASS_BINS[:-1],
    mass_histogram_all_galaxies,
    yerr=mass_histogram_all_galaxies_errors,
    label='All galaxies',
)

plt.errorbar(
    MASS_BINS[:-1],
    mass_histogram_field_only / ratio_of_field_galaxies,
    yerr=mass_histogram_field_only_errors / ratio_of_field_galaxies,
    label='Field only',
)

plt.errorbar(
    MASS_BINS[:-1],
    mass_histogram_cluster_members / ratio_of_cluster_galaxies,
    yerr=mass_histogram_cluster_members_error / ratio_of_cluster_galaxies,
    label='Cluster members',
)

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Stellar Mass ($M_{\odot} \ h_{70}^{-2}$)', fontsize=16)
plt.ylabel('Number Density ($\mathrm{Mpc}^{-3} \ dex^{-1} \ h_{70}^{3}$)', fontsize=16)
plt.xlim((1e5, 1e12))
plt.ylim((10 ** -5.7, 1e0))
plt.tick_params(axis='both', which='major', labelsize=14)
plt.legend()
ax.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()

# Find the clusters that have galaxies among them
unique_cluster_ids = matched_galaxy_dataframe['cluster_name'].unique()

# Print the clusters and their galaxies
cluster_galaxies = matched_galaxy_dataframe.groupby('cluster_name')['uberID'].apply(list).reset_index()
cluster_galaxies['num_galaxies'] = cluster_galaxies['uberID'].apply(len)

# Print the clusters with galaxies
for index, row in cluster_galaxies.iterrows():
    print(f"Cluster {row['cluster_name']} has the following {len(row['uberID'])} galaxies within: {row['uberID']}")
# Find the maximum number of galaxies in a cluster
max_galaxies_row = cluster_galaxies.loc[cluster_galaxies['num_galaxies'].idxmax()]
print(
    f"Cluster {max_galaxies_row['cluster_name']} has the maximum number of galaxies: {max_galaxies_row['num_galaxies']}")

# Count the number of clusters with only one galaxy within them
clusters_with_one_galaxy = sum(cluster_galaxies['num_galaxies'] == 1)
print(f"Number of clusters with only one galaxy: {clusters_with_one_galaxy}")
# Calculate the total number of galaxies within clusters
total_galaxies = cluster_galaxies['num_galaxies'].sum()
print(f"Total number of galaxies within clusters: {total_galaxies}")
# Find clusters with no galaxies
all_cluster_names = cluster_df['c_NAME'].unique()
clusters_with_galaxies = matched_galaxy_dataframe['cluster_name'].dropna().unique()
clusters_without_galaxies = np.setdiff1d(all_cluster_names, clusters_with_galaxies)

print(len(clusters_with_galaxies))
print("Clusters with no galaxies within:", len(clusters_without_galaxies))
for cluster_name in clusters_without_galaxies:
    print(f"Cluster name: {cluster_name}")