In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import h5py

In [3]:
c = 3.*10**8
boxsize = 25.e3

with h5py.File(r'C:\Users\tirth\Documents\Projects\Halo - galaxy connection\CAMELS_data\Astrid_SB7_719_groups_090.hdf5', 'r') as f:
    # Positions, velocities and masses of the halos
    M_h  = f['Group/Group_M_Crit200'][:]*1e10          #Msun/h
    R_h  = f['Group/Group_R_Crit200'][:]#/c             #kpc/h
    V_h  = f['Group/GroupVel'][:]                       #km/s
    V_h  = np.linalg.norm(V_h, axis = 1)
    ID_r = f['Group/GroupFirstSub'][:]                  #Contains halos without gals as this number = -1
    ID_h = np.arange(0, M_h.shape[0], 1, dtype = float) #It is the ID of the halos, to match the gal cat

    # Positions, stellar masses,  of the galaxies
    SM     = f['Subhalo/SubhaloMassType'][:,4]*1e10                                                        #Msun/h
    Colour = f['Subhalo/SubhaloStellarPhotometrics'][:, 4] - f['Subhalo/SubhaloStellarPhotometrics'][:, 5] #g - r
    SFR    = f['Subhalo/SubhaloSFR'][:]*1e10                                                              #(Msun/year)/ (10^10 Msun/h)
    SR     =  f['Subhalo/SubhaloHalfmassRadType'][:, 4]                                                    #kpc/h
    ID_g   = np.array(f['Subhalo/SubhaloGrNr'])                                                            #Gals IDs

In [4]:
#Removing halos without galaxies
indexes = np.where(ID_r != -1)[0]
M_h     = M_h[indexes]
R_h     = R_h[indexes]
V_h     = V_h[indexes]
ID_h    = ID_h[indexes]

# Halo catalog
data    = np.array([M_h, R_h, V_h, ID_h]).T
columns = ['M_h', 'R_h', 'V_h', 'ID'] 
halos   = pd.DataFrame(data = data, columns = columns)

indexes = np.where(SM > 1.3e8)[0]
SM      = SM[indexes]
Colour  = Colour[indexes]
SFR     = SFR[indexes]
SR      = SR[indexes]
ID_g    = ID_g[indexes]

#Galaxy catalog
data    = np.array( [SM, SR, SFR, Colour, ID_g] ).T
columns = ['SM', 'SR', 'SFR', 'Colour', 'ID'] 
gals    = pd.DataFrame(data = data, columns = columns)

In [5]:
print(np.intersect1d(ID_h, ID_g).shape[0])

778


In [6]:
print("Number of galaxies with zero radius =",(gals['SR'] == 0).sum())
print(f"Minimum galaxy stellar mass = {gals['SM'].min():.4e}")

Number of galaxies with zero radius = 0
Minimum galaxy stellar mass = 1.3026e+08


In [7]:
is_central = np.isin(indexes, ID_r)
num_centrals = is_central.sum()

print("Number of total galaxies =",gals.shape[0])
print(f"Number of central galaxies: {num_centrals}")
print(f"Number of satellite galaxies: {len(gals) - num_centrals}")

gals['is_central'] = is_central

Number of total galaxies = 1111
Number of central galaxies: 776
Number of satellite galaxies: 335


In [8]:
top_100_ids = halos.nlargest(100, 'M_h')['ID']
top_gals = gals[gals['ID'].isin(top_100_ids)]
num_satellites = (~top_gals['is_central']).sum()

print(f"Number of satellites in the top 100 halos: {num_satellites}")
print(f"Average satellites per top halo: {num_satellites / len(top_100_ids):.4f}")

Number of satellites in the top 100 halos: 276
Average satellites per top halo: 2.7600


In [None]:
# Merge gals and halos on 'ID' to get M_h for every galaxy
df_plot = pd.merge(gals, halos, on='ID')

plt.figure(figsize=(8, 6))

# Plotting
plt.scatter(df_plot['M_h'], df_plot['SM'], s=10, alpha=0.3, label='All Galaxies', color='gray')
plt.scatter(df_plot[df_plot['is_central']]['M_h'], df_plot[df_plot['is_central']]['SM'], 
            s=15, alpha=0.6, label='Centrals', color='blue')
plt.scatter(df_plot[~df_plot['is_central']]['M_h'], df_plot[~df_plot['is_central']]['SM'], 
            s=15, alpha=0.6, label='Satellites', color='red')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Halo Mass ($M_h$) [$M_\\odot/h$]')
plt.ylabel('Stellar Mass ($SM$) [$M_\\odot/h$]')
plt.title('Stellar Mass vs Halo Mass')
plt.legend()
plt.grid(True, which="both", ls="-", alpha=0.2)
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Parameters for histograms
plot_configs = [
    {'data': 'SM', 'label': 'Stellar Mass', 'ax': axes[0]},
    {'data': 'SR', 'label': 'Radius', 'ax': axes[1]},
    {'data': 'M_h', 'label': 'Halo Mass', 'ax': axes[2]}
]

for config in plot_configs:
    ax = config['ax']
    col = config['data']
    
    # Use log bins for mass and radius to make the distribution clearer
    bins = np.logspace(np.log10(df_plot[col].min()), np.log10(df_plot[col].max()), 30)
    
    ax.hist(df_plot[col], bins=bins, alpha=0.3, label='All', color='gray')
    ax.hist(df_plot[df_plot['is_central']][col], bins=bins, histtype='step', label='Centrals', color='blue', lw=2)
    ax.hist(df_plot[~df_plot['is_central']][col], bins=bins, histtype='step', label='Satellites', color='red', lw=2)
    
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel(config['label'])
    ax.set_ylabel('Count (Log)')
    ax.legend()

plt.tight_layout()
plt.show()