# Load Packages

In [None]:
# Basic Packages
import numpy as np
import h5py
import logging
import os
import shutil
import gc
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import linregress, pearsonr, spearmanr
import seaborn as sns
from scipy.optimize import curve_fit
import scipy.stats as stats
from scipy.ndimage import gaussian_filter
import matplotlib.patches as patches
from skimage.feature import peak_local_max
from matplotlib.collections import PatchCollection
from scipy.stats import pearsonr
from astropy.io import fits
from scipy.stats import kendalltau
from adjustText import adjust_text

# Physics-related Packages
from astropy.cosmology import Planck15

# Path

In [None]:
Base_Path = 'users_path/merger_trace/data/LoVoCCS/aperture_mass_maps/'
Cluster_Path = Base_Path + 'Clusters/'

# Helper Functions

In [None]:
def Read_SNfile(Cluster_Name):
    File_Path = Cluster_Path + f'{Cluster_Name}' + '/mass_map_output/'

    # Find Suitable Rs where S/N 0f the central peak reaches maxmimum
    PeakPixel_Name = File_Path + f'{Cluster_Name}_mass_map_peak_pixel.csv'
    PeakPixel_File = pd.read_csv(PeakPixel_Name)
    x = np.array(PeakPixel_File['x'])[0]
    y = np.array(PeakPixel_File['y'])[0]
    Rs = PeakPixel_File['Rs']
    Rs = int(Rs.iloc[0]) 
    SN_File = File_Path + f'{Cluster_Name}_M_ap_SNR_b100_Rs{Rs}.fits'

    with fits.open(SN_File) as SNf:
    # Get WCS information from the header
        print('***File Info***')
        print(SNf.info())
        wcs = WCS(SNf[0].header)
        #print('***Header Info***')
        print(repr(SNf[0].header))
        SN_data = SNf[0].data

    # Define corners of the image in pixel coordinates
    corners = [(0, 0), (240, 0), (0, 240), (240, 240)]  # Four corners of the image
    world_coords = wcs.pixel_to_world_values(corners)
    world_coords = np.array(world_coords)

    return world_coords, wcs, SN_data, Rs, x, y

In [None]:
def plot_scatter_with_fit(x, y, x_label="X Variable", y_label="Y Variable", title="Scatter Plot with Correlation", scale = False, constrained = False):
    """
    Plots a scatter plot of x vs y with linear regression, confidence interval, 
    and Pearson/Spearman correlation coefficients.

    Parameters:
    - x (array-like): Independent variable
    - y (array-like): Dependent variable
    - x_label (str): Label for X-axis
    - y_label (str): Label for Y-axis
    - title (str): Title of the plot
    """
    # Remove NaN values for correlation and plotting
    mask = ~np.isnan(x) & ~np.isnan(y)
    x, y = x[mask], y[mask]

    # Compute correlations
    pearson_corr, pearson_pval = pearsonr(x, y)
    spearman_corr, spearman_pval = spearmanr(x, y)

    if not constrained:
        # Perform linear regression
        slope, intercept, r_value, p_value, std_err = linregress(x, y)

        # Generate fitted line
        x_fit = np.linspace(min(x), max(x), 100)
        y_fit = slope * x_fit + intercept

        # Compute 95% confidence interval
        n = len(x)
        t_value = 1.96  # 95% confidence level
        y_pred = slope * x + intercept
        residuals = y - y_pred
        s_res = np.sqrt(np.sum(residuals**2) / (n - 2))
        ci = t_value * s_res * np.sqrt(1/n + (x_fit - np.mean(x))**2 / np.sum((x - np.mean(x))**2))

        # Plot scatter plot
        plt.figure(figsize=(8, 6))
        plt.scatter(x, y, alpha=0.5, color="royalblue", s=20, label="Data")
        plt.plot(x_fit, y_fit, color="red", label=f"Fit: y = {slope:.3e}x + {intercept:.3e}")
        plt.fill_between(x_fit, y_fit - ci, y_fit + ci, color="pink", alpha=0.3, label="95% Confidence Interval")

        # Labels and title
        if scale:
            plt.xscale("symlog")  # or "symlog", linthresh=10000
            plt.yscale("symlog", linthresh=1e11)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(f"{title}\nPearson: {pearson_corr:.3f}, Spearman: {spearman_corr:.3f}")
        plt.legend()
        plt.grid(True)

        # Show plot
        plt.show()
    
    else:

        # Define linear model without intercept
        def linear_model(x, a):
            return a * x

        # Fit model
        params, covariance = curve_fit(linear_model, x, y)
        slope = params[0]  # Extract slope

        # Generate fitted values
        x_fit = np.linspace(min(x), max(x), 100)
        y_fit = linear_model(x_fit, slope)

        # Compute Pearson and Spearman correlations
        pearson_corr, pearson_pval = pearsonr(x, y)
        spearman_corr, spearman_pval = spearmanr(x, y)

        # Compute confidence interval
        n = len(x)
        y_pred = linear_model(x, slope)
        residuals = y - y_pred
        std_err = np.std(residuals) / np.sqrt(n)
        ci = 1.96 * std_err  # 95% Confidence Interval

        # Plot scatter plot
        plt.figure(figsize=(8, 6))
        plt.scatter(x, y, alpha=0.5, color="royalblue", s=20, label="Data")
        plt.plot(x_fit, y_fit, color="red", label=f"Fit: y = {slope:.3e}x")
        plt.fill_between(x_fit, y_fit - ci, y_fit + ci, color="pink", alpha=0.3, label="95% Confidence Interval")

        # Set axis scales
        if scale:
            plt.xscale("symlog", linthresh=10000)
            plt.yscale("symlog", linthresh=1e11)

        # Labels and title
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(f"{title}\nPearson: {pearson_corr:.3f}, Spearman: {spearman_corr:.3f}")
        plt.legend()
        plt.grid(True)

        # Show plot
        plt.show()


    # Print correlation results
    print(f"Pearson correlation: {pearson_corr:.3f} (p-value: {pearson_pval:.3e})")
    print(f"Spearman correlation: {spearman_corr:.3f} (p-value: {spearman_pval:.3e})")

# find_peaks

In [None]:
def find_peaks(SN_map, min_distance, threshold_abs):
    # Extract peak positions
    peaks = peak_local_max(SN_map, min_distance=min_distance, threshold_abs=threshold_abs)
    # Sort the peaks by intensity (highest to lowest)
    peaks_sorted = np.array(sorted(peaks, key=lambda p: SN_map[p[0], p[1]], reverse=True))

    return peaks_sorted

## Visualize

In [None]:
def Check_Peaks_Plots(Halo_ID, SN_map1, peaks1_sorted, SAVE= False):
    # Create figure with adjusted size
    fig, axes = plt.subplots(1, 1, figsize=(6, 6), constrained_layout=True)

    # Define colors for different halos
    halo_colors = {
        "center": "cyan",
        "second": "lime",
        "third": "orange",
        "others": "magenta"
    }

# ---- Peak detection in S/N Map 1 ----
    ax1 = axes
    ax1.imshow(SN_map1, cmap='viridis', origin='lower', interpolation='nearest')
    # due to scatter, we have to inverse peak in plots
    ax1.scatter(peaks1_sorted[0, 1], peaks1_sorted[0, 0], color=halo_colors["center"], s=10, label="Central Peaks")
    if len(peaks1_sorted)>1:
        ax1.scatter(peaks1_sorted[1, 1], peaks1_sorted[1, 0], color=halo_colors["second"], s=10, label="Second Peaks") 
    if len(peaks1_sorted)>2: 
        ax1.scatter(peaks1_sorted[2, 1], peaks1_sorted[2, 0], color=halo_colors["third"], s=10, label="Third Peaks")
    if len(peaks1_sorted)>3:  
        ax1.scatter(peaks1_sorted[3:, 1], peaks1_sorted[3:, 0], color=halo_colors["others"], s=10, label="Other Peaks")  
    ax1.set_title("Peak Detection in S/N Map 1")
    ax1.set_aspect('equal')
    ax1.legend(loc="upper right", fontsize=10, frameon=True, facecolor="white", edgecolor="black")  # White background legend


# **Adjust colorbar**
    if SAVE:
        plt.savefig('save_path', dpi=300, bbox_inches='tight')

    plt.show()    

## Convert Peak Pixel to RA,DEC

In [None]:
def peak_unit_convert(wcs, x_pixel, y_pixel):
    ra, dec = wcs.all_pix2world(x_pixel + 1, y_pixel + 1, 0)

    return ra,dec

In [None]:
def ra_dec2xy_pixels(ra, dec, Cluster_Name):
    file_path = Cluster_Path + f"{Cluster_Name}/mass_map_output/{Cluster_Name}_00-1111_gal_dered_dezp_bpz_merge_cut_shear_calib_merge_cut.csv" 
    df = pd.read_csv(file_path)

    RAs = df['ra']
    DECs = df['dec']
    xs = df['x']
    ys = df['y']
    
    x_indice = np.argmin(np.abs(RAs-ra))
    y_indice = np.argmin(np.abs(DECs-dec))
    
    return xs[x_indice], ys[y_indice]


# Find Peak and Generate Profile for Mass Fitting

In [None]:
clusters = np.array([
    # ***gen 3 clusters***
    "A1285", "A1307", "A1650", "A1651", "A1750",
    "A2029", 
    # "A2055", 
    "A2147", "A2420", "A2426",
    # "A2457", 
    "A2597", "A2811", "A2837", "A3112", "A3158", 
    "A3266", "A3558", "A3571", #"A3667", 
    "A3695", 
    # "A3814", 
    "A3822", "A3827", "A3911", "A3921", 
    "A401", "A4010", "A754", "A780", #"A85", 
    "RXCJ1217.6+0339", "RXCJ1539.5-8335", "RXCJ1558.3-1410",

    # ***gen 2 clusters***
    "A1023", #A119
    "A1205", "A133", "A1348", "A1606", "A1644",
    "A1736", #"A2033"
    "A2050", #A2051
    "A2052", "A2055", #"A2063", #"A2142",
    "A2351", "A2384", "A2428", "A2440", "A2443",
    "A2533", "A2556", "A2670", #"A2703", 
    #"A2721", #A2734,
    "A2941", "A3126", "A3128", #A3376,
    "A3395", "A3528", "A3532", #A3628,
    "A3667", #A3806, #A3809,
    "A3814", "A3825", "A3836","A3998","A4059",
    "A458", #"496", "A539",
    "A761", "A763", #"A85", 
    "RXCJ0049.4-2931", "RXCJ0351.1-8212",
    # RXCJ0524.4+0819,
    "RXCJ1139.4-3327", "RXCJ1215.4-3900", "RXCJ1332.9-2519", #RXCJ1407.8-5100, #RXCJ1524.2-3154,
    # RXCJ1958.2-3011,
    "RXCJ2218.2-0350", "RXCJ2313.9-4244", #RXCJ2344.2-0422, 
    "RXJ0820.9+0751"])    
# A3836 does have a new version, but I currently use an old version

# A2721 not having merge_cut_merge_cut.csv

# A85 not having merge_cut_merge_cut.csv


In [None]:
'''
clusters = np.array([
    # ***gen 2 clusters***
    "A1023", #A119
    "A1205", "A133", "A1348", "A1606", "A1644",
    "A1736", #"A2033"
    "A2050", #A2051
    "A2052", "A2055", #"A2063", #"A2142",
    "A2351", "A2384", "A2428", "A2440", "A2443",
    "A2533", "A2556", "A2670", #"A2703", 
    #"A2721", #A2734,
    "A2941", "A3126", "A3128", #A3376,
    "A3395", "A3528", "A3532", #A3628,
    "A3667", #A3806, #A3809,
    "A3814", "A3825", "A3836","A3998","A4059",
    "A458", #"496", "A539",
    "A761", "A763", #"A85", 
    "RXCJ0049.4-2931", "RXCJ0351.1-8212",
    # RXCJ0524.4+0819,
    "RXCJ1139.4-3327", "RXCJ1215.4-3900", "RXCJ1332.9-2519", #RXCJ1407.8-5100, #RXCJ1524.2-3154,
    # RXCJ1958.2-3011,
    "RXCJ2218.2-0350", "RXCJ2313.9-4244", #RXCJ2344.2-0422, 
    "RXJ0820.9+0751"])    
'''

In [None]:
error = 0
separations = np.zeros(len(clusters),dtype=float)
quantities = np.zeros(len(clusters),dtype=float)
peak_numbers = np.zeros(len(clusters),dtype=float)

for i in range(len(clusters)):
    Cluster_Name = clusters[i]

    world_coords, wcs, SN_data, Rs, x_fit, y_fit = Read_SNfile(Cluster_Name)
    SN_smooth = gaussian_filter(SN_data, sigma=10)
    SN_highpass = SN_data - SN_smooth

    peaks_sorted = find_peaks(SN_smooth, 2, 0.05)
    print("***")
    print(peaks_sorted)
    for j in range(len(peaks_sorted)):
        x0 = peaks_sorted[j][0]
        y0 = peaks_sorted[j][1]
        peak_value = SN_smooth[x0,y0]
        print(peak_value, '')
    print('peak number', len(peaks_sorted))
    
    peak_numbers[i] = len(peaks_sorted)
   
    if len(peaks_sorted)>=2:
        Check_Peaks_Plots(Cluster_Name, SN_smooth, peaks_sorted, SAVE= False)
        ra1, dec1 = peak_unit_convert(wcs, peaks_sorted[0][1], peaks_sorted[0][0])
        x1, y1 = ra_dec2xy_pixels(ra1, dec1, Cluster_Name)
        ra2,dec2 = peak_unit_convert(wcs, peaks_sorted[1][1], peaks_sorted[1][0])
        x2, y2 = ra_dec2xy_pixels(ra2, dec2, Cluster_Name)

        print(f'*** results for {Cluster_Name}')
        print(f'first peak at {peaks_sorted[0][1],peaks_sorted[0][0]}')
        print(f'first peak: x at {x1}, y at {y1} with {SN_data[peaks_sorted[0][0],peaks_sorted[0][1]]}')
        print(f'first peak: x at {x1}, y at {y1} with {SN_smooth[peaks_sorted[0][0],peaks_sorted[0][1]]}')
        print("***")
        print(f'second peak at {peaks_sorted[1][1],peaks_sorted[1][0]}')
        print(f'second peak: x at {x2}, y at {y2} with {SN_data[peaks_sorted[1][0],peaks_sorted[1][1]]}')
        print(f'second peak: x at {x2}, y at {y2} with {SN_smooth[peaks_sorted[1][0],peaks_sorted[1][1]]}')

        separations[i] = float(np.linalg.norm(peaks_sorted[0] - peaks_sorted[1]))
        print(np.linalg.norm(peaks_sorted[0]-peaks_sorted[1]))
        print(separations[i])
        peak_ratio = SN_smooth[peaks_sorted[1][0],peaks_sorted[1][1]]/SN_smooth[peaks_sorted[0][0],peaks_sorted[0][1]]
        print('second_peak_intensity/frist * separation', peak_ratio*separations[i])
        quantities[i] = peak_ratio*np.linalg.norm(peaks_sorted[0]-peaks_sorted[1])
        error += np.abs(x_fit-x1)
        error += np.abs(y_fit-y1)
    else:
        print('no peaks')

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    # Plot original S/N map
    im1 = axes[0].imshow(SN_data, cmap='viridis', origin='lower')
    axes[0].set_title(f"Original S/N Data for {Cluster_Name}")
    plt.colorbar(im1, ax=axes[0])

    # Plot smoothed S/N map
    im2 = axes[1].imshow(SN_smooth, cmap='viridis', origin='lower')
    axes[1].set_title(f"Smoothed S/N (Gaussian Filter) for {Cluster_Name}")
    plt.colorbar(im2, ax=axes[1])

    # Plot high-pass filtered S/N map
    im3 = axes[2].imshow(SN_highpass, cmap='viridis', origin='lower')
    axes[2].set_title(f"High-Pass S/N (Detail Extraction) for {Cluster_Name}")
    plt.colorbar(im3, ax=axes[2])

    plt.tight_layout()
    plt.show()

# Mass Ratio

**A1285**:
- A1285: $9.30_{-1.92}^{+ 2.16} \times 10^{14}M_\odot$
- A1285*: $0.82_{-0.54}^{+ 0.96} \times 10^{14}M_\odot$

**A1307**
- A1307: $1.02_{-0.54}^{+ 0.80} \times 10^{14}M_\odot$
- A1307*: $0.41_{-0.32}^{+ 0.78} \times 10^{14}M_\odot$

**A1650**
- A1650: $6.90_{-1.50}^{+ 1.76} \times 10^{14}M_\odot$
- A1650*: $1.79_{-0.88}^{+ 1.18} \times 10^{14}M_\odot$

**A1651**
- A1651: $7.29_{-1.54}^{+ 1.77} \times 10^{14}M_\odot$
- A1651*: $1.16_{-0.72}^{+ 1.10} \times 10^{14}M_\odot$

**A1750**
- A1750: $4.68_{-1.25}^{+ 1.48} \times 10^{14}M_\odot$
- A1750*: $0.28_{-0.26}^{+ 0.74} \times 10^{14}M_\odot$

**A2029**
- A2029: $12.47_{-1.73}^{+ 1.82} \times 10^{14}M_\odot$
- A2029*: $4.67_{-1.23}^{+ 1.46} \times 10^{14}M_\odot$

**A2147**
- A2147: $2.10_{-0.87}^{+ 1.08} \times 10^{14}M_\odot$
- A2147*: $1.40_{-0.72}^{+ 1.03} \times 10^{14}M_\odot$

**A2420**
- A2420: $5.32_{-1.28}^{+ 1.40} \times 10^{14}M_\odot$
- A2420*: $0.38_{-0.32}^{+ 0.88} \times 10^{14}M_\odot$

**A2426**
- A2426: $5.59_{-1.12}^{+ 1.40} \times 10^{14}M_\odot$
- A2426*: $2.24_{-0.85}^{+ 1.03} \times 10^{14}M_\odot$

**A2597**
- A2597: $4.01_{-0.97}^{+ 1.12} \times 10^{14}M_\odot$
- A2597*: $0.26_{-0.23}^{+ 0.63} \times 10^{14}M_\odot$

**A2811**
- A2811: $6.04_{-1.30}^{+ 1.46} \times 10^{14}M_\odot$
- A2811*: $0.77_{-0.59}^{+ 1.17} \times 10^{14}M_\odot$

**A2837**
- A2837: $4.49_{-1.37}^{+ 1.67} \times 10^{14}M_\odot$
- A2837*: $0.50_{-0.54}^{+ 1.73} \times 10^{14}M_\odot$

**A3112**
- A3112: $4.09_{-1.12}^{+ 1.28} \times 10^{14}M_\odot$
- A3112*: $3.34_{-1.04}^{+ 1.29} \times 10^{14}M_\odot$

**A3158**
- A3158: $9.84_{-1.71}^{+ 1.80} \times 10^{14}M_\odot$
- A3158*: $1.22_{-0.63}^{+ 0.93} \times 10^{14}M_\odot$

**A3266**
- A3266: $12.71_{-1.83}^{+ 2.06} \times 10^{14}M_\odot$
- A3266*: $0.58_{-0.49}^{+ 1.17} \times 10^{14}M_\odot$

**A3558**
- A3558: $11.39_{-2.08}^{+ 2.58} \times 10^{14}M_\odot$
- A3558*: $2.27_{-1.31}^{+ 1.87} \times 10^{14}M_\odot$

**A3571**
- A3571: $3.94_{-1.28}^{+ 1.61} \times 10^{14}M_\odot$
- A3571*: $1.76_{-0.87}^{+ 1.42} \times 10^{14}M_\odot$

**A3695**
- 

**A3822**
- A3822: $12.10_{-1.78}^{+ 1.99} \times 10^{14}M_\odot$
- A3822*: $0.32_{-0.33}^{+ 1.10} \times 10^{14}M_\odot$

**A3827**
- A3827: $9.64_{-1.47}^{+ 1.69} \times 10^{14}M_\odot$
- A3827*: $1.89_{-0.97}^{+ 1.17} \times 10^{14}M_\odot$

**A3911**
- A3911: $4.64_{-1.11}^{+ 1.32} \times 10^{14}M_\odot$
- A3911*: $2.13_{-1.06}^{+ 1.40} \times 10^{14}M_\odot$

**A3921**
- A3921: $3.48_{-1.30}^{+ 1.50} \times 10^{14}M_\odot$
- A3921*: $2.18_{-1.28}^{+ 1.86} \times 10^{14}M_\odot$

**A401**
- A401: $15.69_{-2.84}^{+ 3.23} \times 10^{14}M_\odot$
- A401*: $4.36_{-1.59}^{+ 1.99} \times 10^{14}M_\odot$

**A4010**
- A4010: $3.43_{-1.09}^{+ 1.28} \times 10^{14}M_\odot$
- A4010*: $0.16_{-0.21}^{+ 0.80} \times 10^{14}M_\odot$


**A780**
- A780: $2.97_{-0.98}^{+ 1.20} \times 10^{14}M_\odot$
- A780*: $0.05_{-0.08}^{+ 0.15} \times 10^{14}M_\odot$

**RXCJ1217.6+0339**
- RXCJ1217.6+0339: $7.09_{-1.76}^{+ 1.98} \times 10^{14}M_\odot$
- RXCJ1217.6+0339*: $2.39_{-1.27}^{+ 1.70} \times 10^{14}M_\odot$

**RXCJ1539.5-8335**
- RXCJ1539.5-8335: $5.19_{-2.30}^{+ 3.13} \times 10^{14}M_\odot$
- RXCJ1539.5-8335*: $0.81_{-0.86}^{+ 2.62} \times 10^{14}M_\odot$

**RXCJ1558.3-1410**
- RXCJ1558.3-1410: $1.36_{-0.75}^{+ 1.09} \times 10^{14}M_\odot$
- RXCJ1558.3-1410*: $2.65_{-1.29}^{+ 1.52} \times 10^{14}M_\odot$

## GEN2 clusters

**A1023**
- A1023: $3.52_{-1.07}^{+ 1.29} \times 10^{14}M_\odot$
- A1023*: $2.06_{-0.93}^{+ 1.25} \times 10^{14}M_\odot$

**A1205**
- A1205: $4.60_{-1.16}^{+ 1.40} \times 10^{14}M_\odot$
- A1205*: $0.73_{-0.54}^{+ 1.15} \times 10^{14}M_\odot$

**A133**
- $5.22_{-1.03}^{+ 1.18} \times 10^{14}M_\odot$
- $0.72_{-0.42}^{+ 0.57} \times 10^{14}M_\odot$

**A1348**
- A1348: $4.67_{-1.21}^{+ 1.38} \times 10^{14}M_\odot$
- A1348*: $3.58_{-1.29}^{+ 1.55} \times 10^{14}M_\odot$

**A1606**
- A1606: $3.99_{-1.12}^{+ 1.34} \times 10^{14}M_\odot$
- A1606*: $0.15_{-0.15}^{+ 0.41} \times 10^{14}M_\odot$

**1644**
- having only one peak

**A1736**
- A1736: $5.43_{-1.66}^{+ 1.97} \times 10^{14}M_\odot$
- A1736*: $0.51_{-0.40}^{+ 1.05} \times 10^{14}M_\odot$

**A2050**
- A2050: $2.00_{-0.78}^{+ 0.97} \times 10^{14}M_\odot$
- A2050*: $1.82_{-1.11}^{+ 1.52} \times 10^{14}M_\odot$

**A2052**
- A2052: $0.70_{-0.34}^{+ 0.60} \times 10^{14}M_\odot$
- A2052*: $0.54_{-0.37}^{+ 0.68} \times 10^{14}M_\odot$

**A2055**
- A2055: $0.46_{-0.29}^{+ 0.51} \times 10^{14}M_\odot$
- A2055*: $3.02_{-1.32}^{+ 1.52} \times 10^{14}M_\odot$

**A2351**
- A2351: $2.33_{-0.82}^{+ 1.13} \times 10^{14}M_\odot$
- A2351*: $1.03_{-0.61}^{+ 0.98} \times 10^{14}M_\odot$

**A2384**
- A2384: $6.99_{-1.39}^{+ 1.52} \times 10^{14}M_\odot$
- A2384*: $0.60_{-0.45}^{+ 1.01} \times 10^{14}M_\odot$

**A2428**
- A2428: $1.15_{-0.48}^{+ 0.75} \times 10^{14}M_\odot$
- A2428*: $2.29_{-1.10}^{+ 1.35} \times 10^{14}M_\odot$

**A2440**
- A2440: $5.96_{-1.47}^{+ 1.63} \times 10^{14}M_\odot$
- A2440*: $1.26_{-0.79}^{+ 1.41} \times 10^{14}M_\odot$

**A2443**
- A2443: $8.30_{-1.62}^{+ 1.71} \times 10^{14}M_\odot$
- A2443*: $0.62_{-0.51}^{+ 1.29} \times 10^{14}M_\odot$

**A2533**
- A2533: $6.79_{-1.57}^{+ 1.82} \times 10^{14}M_\odot$
- A2533*: $0.72_{-0.53}^{+ 1.08} \times 10^{14}M_\odot$

**A2556**
- A2556: $2.46_{-1.03}^{+ 1.20} \times 10^{14}M_\odot$
- A2556*: $2.88_{-1.14}^{+ 1.37} \times 10^{14}M_\odot$

**A2670**
- A2670: $10.41_{-1.93}^{+ 2.15} \times 10^{14}M_\odot$
- A2670*: $2.88_{-1.65}^{+ 2.10} \times 10^{14}M_\odot$

**A2941**
- A2941: $5.94_{-1.52}^{+ 1.73} \times 10^{14}M_\odot$
- A2941*: $3.23_{-1.24}^{+ 1.47} \times 10^{14}M_\odot$

**A3126**
- A3126: $7.85_{-1.53}^{+ 1.78} \times 10^{14}M_\odot$
- A3126*: $3.47_{-1.25}^{+ 1.53} \times 10^{14}M_\odot$

**A3128**
- A3128: $8.89_{-1.67}^{+ 2.00} \times 10^{14}M_\odot$
- A3128*: $0.08_{-0.11}^{+ 0.30} \times 10^{14}M_\odot$

**A3395**
- A3395: $6.47_{-1.49}^{+ 1.72} \times 10^{14}M_\odot$
- A3395*: $0.59_{-0.42}^{+ 0.82} \times 10^{14}M_\odot$

**A3528**
- only have one peak

**A3532**
- A3532: $4.38_{-1.25}^{+ 1.47} \times 10^{14}M_\odot$
- A3532*: $3.43_{-1.27}^{+ 1.66} \times 10^{14}M_\odot$

**A3667**
- A3667: $6.83_{-1.61}^{+ 1.76} \times 10^{14}M_\odot$
- A3667*: $3.21_{-1.03}^{+ 1.18} \times 10^{14}M_\odot$

**A3814**
- A3814: $0.91_{-0.70}^{+ 1.14} \times 10^{14}M_\odot$
- A3814*: $2.75_{-1.25}^{+ 1.39} \times 10^{14}M_\odot$

**A3825**
- A3825: $8.14_{-1.59}^{+ 1.84} \times 10^{14}M_\odot$
- A3825*: $0.48_{-0.32}^{+ 0.58} \times 10^{14}M_\odot$

**A3836**
- A3836: $6.07_{-1.35}^{+ 1.60} \times 10^{14}M_\odot$
- A3836*: $1.82_{-0.73}^{+ 0.91} \times 10^{14}M_\odot$

**A3998**
- A3998: $4.63_{-1.52}^{+ 1.66} \times 10^{14}M_\odot$
- A3998*: $0.97_{-0.77}^{+ 1.74} \times 10^{14}M_\odot$

**A4059**
- A4059: $0.99_{-0.50}^{+ 0.73} \times 10^{14}M_\odot$
- A4059*: $1.99_{-0.86}^{+ 1.13} \times 10^{14}M_\odot$

**A458**
- A458: $9.02_{-5.21}^{+ 10.63} \times 10^{14}M_\odot$
- A458*: $16.79_{-8.07}^{+ 12.31} \times 10^{14}M_\odot$

**A761**
- A761: $3.32_{-0.90}^{+ 1.04} \times 10^{14}M_\odot$
- A761*: $1.00_{-0.69}^{+ 1.62} \times 10^{14}M_\odot$

**A763**
- A763: $1.60_{-0.81}^{+ 1.04} \times 10^{14}M_\odot$
- A763*: $1.43_{-0.94}^{+ 1.29} \times 10^{14}M_\odot$

**RXCJ0049.4-2931**
- RXCJ0049.4-2931: $3.78_{-1.06}^{+ 1.22} \times 10^{14}M_\odot$
- RXCJ0049.4-2931*: $1.49_{-0.84}^{+ 1.21} \times 10^{14}M_\odot$

**RXCJ0351.1-8212**
- RXCJ0351.1-8212: $3.01_{-1.17}^{+ 1.40} \times 10^{14}M_\odot$
- RXCJ0351.1-8212*: $2.00_{-0.95}^{+ 1.34} \times 10^{14}M_\odot$

**RXCJ1139.4-3327**
- RXCJ1139.4-3327: $2.98_{-1.00}^{+ 1.21} \times 10^{14}M_\odot$
- RXCJ1139.4-3327*: $1.94_{-0.84}^{+ 1.18} \times 10^{14}M_\odot$

**RXCJ1215.4-3900**
- RXCJ1215.4-3900: $5.64_{-1.69}^{+ 2.10} \times 10^{14}M_\odot$
- RXCJ1215.4-3900*: $1.59_{-1.04}^{+ 1.66} \times 10^{14}M_\odot$

**RXCJ1332.9-2519**
- RXCJ1332.9-2519: $2.14_{-0.86}^{+ 1.07} \times 10^{14}M_\odot$
- RXCJ1332.9-2519*: $0.56_{-0.48}^{+ 1.36} \times 10^{14}M_\odot$

**RXCJ2218.2-0350**
- RXCJ2218.2-0350: $6.45_{-1.65}^{+ 1.92} \times 10^{14}M_\odot$
- RXCJ2218.2-0350*: $0.82_{-0.57}^{+ 1.16} \times 10^{14}M_\odot$

**RXCJ2313.9-4244**
- RXCJ2313.9-4244: $1.52_{-0.62}^{+ 0.80} \times 10^{14}M_\odot$
- RXCJ2313.9-4244*: $1.45_{-0.66}^{+ 0.94} \times 10^{14}M_\odot$

**RXJ0820.9+0751**
- RXJ0820.9+0751: $2.14_{-0.92}^{+ 1.10} \times 10^{14}M_\odot$
- RXJ0820.9+0751*: $0.31_{-0.30}^{+ 0.93} \times 10^{14}M_\odot$



In [None]:
clusters_masses = np.array([
    9.30, 1.02, 6.90, 7.29, 4.68,
    12.47,
    # A2055
    2.10, 5.32, 5.59,
    # A2457
    4.01, 6.04, 4.49, 4.09, 9.84,
    12.71, 11.39, 3.94, #3667
    np.nan,
    # A3814
    12.10, 9.64, 4.64, 3.48, 
    15.69, 3.43, np.nan, 2.97, #A85
    7.09, 5.19, 1.36,
    # '***gen2 Clusters***'
    3.52, #A119
    4.60, 5.22, 4.67, 3.99, np.nan,
    5.43, # A2033,
    2.00, # A2051,
    0.70, 0.46, #"A2063", #"A2142",
    2.33, 6.99, 1.15, 5.96, 8.30,
    6.79, 2.46, 10.41, #"A2703", 
    #"A2721", #A2734,
    5.94, 7.85, 8.89, #A3376,
    6.47, np.nan, 4.38, #A3628,
    6.83, #A3806, #A3809,
    0.91, 8.14, 6.07, 4.63, 0.99,
    9.02, #"496", "A539",
    3.32, 1.60, #"A85",
    3.78, 3.01,
    # RXCJ0524.4+0819,
    2.98, 5.64, 2.14, #RXCJ1407.8-5100, #RXCJ1524.2-3154,
    # RXCJ1958.2-3011,
    6.45, 1.52, #RXCJ2344.2-0422, 
    2.14
])

Subhalo_masses = np.array([
    0.82, 0.41, 1.79, 1.16, 0.28,
    4.67,
    # A2055
    1.40, 0.38, 2.24,
    # A2457
    0.26, 0.77, 0.50, 3.34, 1.12,
    0.58, 2.27, 1.76, #3667
    np.nan,
    # A3814
    0.32, 1.89, 2.13, 2.18,
    4.36, 0.16, np.nan, 0.05, # A85
    2.39, 0.81, 2.65,
    # '''gen 2 clusters'''
    2.06, #A119,
    0.73, 0.72, 3.58, 0.15, np.nan,
    0.51, #A2033,
    1.82, #A2051
    0.54, 3.02, #"A2063", #"A2142",
    1.03, 0.60, 2.29, 1.26, 0.62,
    0.72, 2.88, 2.88, #"A2703", 
    #"A2721", #A2734,
    3.23, 3.47, 0.08, #A3376,
    0.59, np.nan, 3.43, #A3628,
    3.21, #A3806, #A3809,
    2.75, 0.48, 1.82, 0.97, 1.99,
    16.79, #"496", "A539",
    1.00, 1.43, #"A85",
    1.49, 2.00,
    # RXCJ0524.4+0819,
    1.94, 1.59, 0.56, #RXCJ1407.8-5100, #RXCJ1524.2-3154,
    # RXCJ1958.2-3011,
    0.82, 1.45, #RXCJ2344.2-0422, 
    0.31
])

filter_2ndmassive = np.array(
    [True, True, True, True, True,
     True,
     # A2055
     True, True,True,
     # A2457,
     True, True, False, True, True,
     True, True, True, #36667
     False,
     # A3814,
     False, True, True, True,
     True, False, False, False, #A85,
     True, False,True,
     # '''gen2 Clusters'''
     True, #A119,
     True, True, True, False, False,
     True, #A2033,
     True, #A2051,
     False, False, #"A2063", #"A2142",
     True, True, False, True, True,
     True, True, True, #"A2703", 
     #"A2721", #A2734,
     True, True, False, #A3376,
     True, False, False, #A3628,
     True, #A3806, #A3809,
     False, True, True, True, True,
     False, #"496", "A539",
     False, False, #"A85",
     True, True,
     # RXCJ0524.4+0819,
     True, True, False, #RXCJ1407.8-5100, #RXCJ1524.2-3154,
     # RXCJ1958.2-3011,
     True, False, #RXCJ2344.2-0422, 
     False
])

In [None]:
Cluster_original_masses = np.array([
    9.03, 1.04, 6.86, 7.12, 4.80,
    11.96,
    # A2055
    2.27, 5.33, 5.93,
    # A2457
    3.95, 5.98, 4.40, 3.68, 9.98,
    12.22, 11.33, 4.11, # A3667
    7.49,
    # A3814
    12.08, 9.54, 4.50, 3.39,
    15.48, 3.45, 18.21, 2.95,  #A85
    6.85, 5.11, 1.52,
    '''gen 2 clusters'''
])# unit 10^14 M_{\odot}

# Results for Gen3

In [None]:
Subhalo_masses_gen3 = Subhalo_masses[0:29]
clusters_masses_gen3 = clusters_masses[0:29]
filter_2ndmassive_gen3 = filter_2ndmassive[0:29]
Subhalo_masses_gen3 = Subhalo_masses_gen3.astype(float)
clusters_masses_gen3 = clusters_masses_gen3.astype(float)

valid_cluster_name_gen3 = clusters[0:29][filter_2ndmassive_gen3]
mass_ratios_gen3 = (Subhalo_masses_gen3/clusters_masses_gen3)[filter_2ndmassive_gen3]
filter_gen3 = np.where((mass_ratios_gen3 !=np.nan) &
                  (mass_ratios_gen3 > 0.1))

In [None]:
x = quantities[0:29][filter_2ndmassive_gen3][filter_gen3]
y = mass_ratios_gen3[filter_gen3]

pearson_corr, pearson_p = pearsonr(x, y)
spearman_corr, spearman_p = spearmanr(x, y)
kendall_corr, kendall_p = kendalltau(x, y)


print(f"Pearson correlation: {pearson_corr:.3f}, p-value: {pearson_p:.3e}")
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3e}")
print(f"Kendall Tau correlation: {kendall_corr:.3f}, p-value: {kendall_p:.3e}")

plt.figure(figsize=(8,6))
plt.scatter(x, y, alpha=0.7, edgecolors='k')
plt.xlabel("Variable Rough Quantities")
plt.ylabel("Variable Mass Ratio")
plt.title("Scatter Plot of rough quantities vs mass ratio ")
plt.grid(True)

texts = []
for i, name in enumerate(valid_cluster_name_gen3[filter_gen3]):
    texts.append(plt.text(x[i], y[i], name, fontsize=7))

adjust_text(texts, arrowprops=dict(arrowstyle="-", color='gray', alpha=0.5))

plt.show()


n_bootstrap = 1000
pearson_bootstrap = []
spearman_bootstrap = []

for _ in range(n_bootstrap):
    sample_idx = np.random.choice(len(x), len(x), replace=True)
    x_sample = x[sample_idx]
    y_sample = y[sample_idx]
    pearson_bootstrap.append(pearsonr(x_sample, y_sample)[0])
    spearman_bootstrap.append(spearmanr(x_sample, y_sample)[0])

pearson_ci = np.percentile(pearson_bootstrap, [2.5, 97.5])
spearman_ci = np.percentile(spearman_bootstrap, [2.5, 97.5])

print(f"95% CI for Pearson: {pearson_ci}")
print(f"95% CI for Spearman: {spearman_ci}")

print(f"Bootstrap Pearson mean: {np.mean(pearson_bootstrap):.3f}, std: {np.std(pearson_bootstrap):.3f}")
print(f"Bootstrap Spearman mean: {np.mean(spearman_bootstrap):.3f}, std: {np.std(spearman_bootstrap):.3f}")

print(f'The number of gen3 cluster samples is {len(valid_cluster_name_gen3[filter_gen3])}')

# Results for Gen2

In [None]:
Subhalo_masses_gen2 = Subhalo_masses[29:]
clusters_masses_gen2 = clusters_masses[29:]
filter_2ndmassive_gen2 = filter_2ndmassive[29:]
Subhalo_masses_gen2 = Subhalo_masses_gen2.astype(float)
clusters_masses_gen2 = clusters_masses_gen2.astype(float)

valid_cluster_name_gen2 = clusters[29:][filter_2ndmassive_gen2]
mass_ratios_gen2 = (Subhalo_masses_gen2/clusters_masses_gen2)[filter_2ndmassive_gen2]
filter_gen2 = np.where((mass_ratios_gen2 !=np.nan) &
                  (mass_ratios_gen2 > 0.1))

In [None]:
x = quantities[29:][filter_2ndmassive_gen2][filter_gen2]
y = mass_ratios_gen2[filter_gen2]

pearson_corr, pearson_p = pearsonr(x, y)
spearman_corr, spearman_p = spearmanr(x, y)
kendall_corr, kendall_p = kendalltau(x, y)


print(f"Pearson correlation: {pearson_corr:.3f}, p-value: {pearson_p:.3e}")
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3e}")
print(f"Kendall Tau correlation: {kendall_corr:.3f}, p-value: {kendall_p:.3e}")

plt.figure(figsize=(8,6))
plt.scatter(x, y, alpha=0.7, edgecolors='k')
plt.xlabel("Variable Rough Quantities")
plt.ylabel("Variable Mass Ratio")
plt.title("Scatter Plot of rough quantities vs mass ratio ")
plt.grid(True)

texts = []
for i, name in enumerate(valid_cluster_name_gen2[filter_gen2]):
    texts.append(plt.text(x[i], y[i], name, fontsize=7))

adjust_text(texts, arrowprops=dict(arrowstyle="-", color='gray', alpha=0.5))

plt.show()

n_bootstrap = 1000
pearson_bootstrap = []
spearman_bootstrap = []

for _ in range(n_bootstrap):
    sample_idx = np.random.choice(len(x), len(x), replace=True)
    x_sample = x[sample_idx]
    y_sample = y[sample_idx]
    pearson_bootstrap.append(pearsonr(x_sample, y_sample)[0])
    spearman_bootstrap.append(spearmanr(x_sample, y_sample)[0])

pearson_ci = np.percentile(pearson_bootstrap, [2.5, 97.5])
spearman_ci = np.percentile(spearman_bootstrap, [2.5, 97.5])

print(f"95% CI for Pearson: {pearson_ci}")
print(f"95% CI for Spearman: {spearman_ci}")

print(f"Bootstrap Pearson mean: {np.mean(pearson_bootstrap):.3f}, std: {np.std(pearson_bootstrap):.3f}")
print(f"Bootstrap Spearman mean: {np.mean(spearman_bootstrap):.3f}, std: {np.std(spearman_bootstrap):.3f}")

print(f'The number of gen2 cluster samples is {len(valid_cluster_name_gen2[filter_gen2])}')

# Results for all clusters

In [None]:
Subhalo_masses_all = Subhalo_masses
clusters_masses_all = clusters_masses
filter_2ndmassive_all = filter_2ndmassive
Subhalo_masses_all = Subhalo_masses_all.astype(float)
clusters_masses_all = clusters_masses_all.astype(float)

valid_cluster_name_all = clusters[filter_2ndmassive_all]
mass_ratios_all = (Subhalo_masses_all/clusters_masses_all)[filter_2ndmassive_all]
filter_all = np.where((mass_ratios_all !=np.nan) &
                  (mass_ratios_all > 0.1))

In [None]:
x = quantities[filter_2ndmassive][filter_all]
y = mass_ratios_all[filter_all]

pearson_corr, pearson_p = pearsonr(x, y)
spearman_corr, spearman_p = spearmanr(x, y)
kendall_corr, kendall_p = kendalltau(x, y)


print(f"Pearson correlation: {pearson_corr:.3f}, p-value: {pearson_p:.3e}")
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3e}")
print(f"Kendall Tau correlation: {kendall_corr:.3f}, p-value: {kendall_p:.3e}")

plt.figure(figsize=(8,6))
plt.scatter(x, y, alpha=0.7, edgecolors='k')
plt.xlabel("Variable Rough Quantities")
plt.ylabel("Variable Mass Ratio")
plt.title("Scatter Plot of rough quantities vs mass ratio ")
plt.grid(True)

texts = []
for i, name in enumerate(valid_cluster_name_all[filter_all]):
    texts.append(plt.text(x[i], y[i], name, fontsize=7))

adjust_text(texts, arrowprops=dict(arrowstyle="-", color='gray', alpha=0.5))

plt.show()


n_bootstrap = 1000
pearson_bootstrap = []
spearman_bootstrap = []

for _ in range(n_bootstrap):
    sample_idx = np.random.choice(len(x), len(x), replace=True)
    x_sample = x[sample_idx]
    y_sample = y[sample_idx]
    pearson_bootstrap.append(pearsonr(x_sample, y_sample)[0])
    spearman_bootstrap.append(spearmanr(x_sample, y_sample)[0])

pearson_ci = np.percentile(pearson_bootstrap, [2.5, 97.5])
spearman_ci = np.percentile(spearman_bootstrap, [2.5, 97.5])

print(f"95% CI for Pearson: {pearson_ci}")
print(f"95% CI for Spearman: {spearman_ci}")

print(f"Bootstrap Pearson mean: {np.mean(pearson_bootstrap):.3f}, std: {np.std(pearson_bootstrap):.3f}")
print(f"Bootstrap Spearman mean: {np.mean(spearman_bootstrap):.3f}, std: {np.std(spearman_bootstrap):.3f}")

print(f'The number of total cluster samples is {len(valid_cluster_name_all[filter_all])}')


In [None]:
# create data frame
df_out = pd.DataFrame({
    "cluster_name": valid_cluster_name_all[filter_all],
    "quantity": x,
    "true_mass_ratio": y
})


df_out.to_csv("mass_ratio_quantity_lovoccs.csv", index=False)

print("Saved as cluster_quantity_massratio.csv")
