In [11]:
import os
import pynbody
import numpy as np
def load_amiga_grp(filename):
    # Assuming the file contains an integer halo ID for each particle in a single column
    halo_ids = np.loadtxt(filename, dtype=np.int32)
    return halo_ids

def load_iord(filename):
    # Assuming the file contains a list of particle IDs in a single column
    particle_ids = np.loadtxt(filename, dtype=np.int64)  # Using int64 if IDs are large
    return particle_ids
    

def validate_file_path(file_path):
    """Check if a file exists at the given path and print a message."""
    if os.path.exists(file_path):
        print(f"File found: {file_path}")
        return True
    else:
        print(f"File not found: {file_path}")
        return False



# Simulation paths
s1path = '/data/REPOSITORY/dwarf_volumes/storm.cosmo25cmb.4096/storm.cosmo25cmb.4096.004096'
s2path = '/data/akaxia/storm/storm.cosmo25cmbsi2s50v35.4096/storm.cosmo25cmbsi2s50v35.4096.004096'

# Load the simulations
s_1 = pynbody.load(s1path)
s_2 = pynbody.load(s2path)

# AHF file paths for the substructure files
ahf_basename_s1 = '/data/REPOSITORY/dwarf_volumes/storm.cosmo25cmb.4096/storm.cosmo25cmb.4096.004096.0000.z0.000.AHF_particles'
ahf_basename_s2 = '/data/akaxia/storm/storm.cosmo25cmbsi2s50v35.4096/storm.cosmo25cmbsi2s50v35.4096.004096.z0.000.AHF_particles'

amiga_file = '/data/REPOSITORY/dwarf_volumes/storm.cosmo25cmb.4096/storm.cosmo25cmb.4096.004096.amiga.grp'
iord_file = '/data/akaxia/storm/storm.cosmo25cmbsi2s50v35.4096/storm.cosmo25cmbsi2s50v35.4096.004096.iord'

amiga_halo_ids = load_amiga_grp(amiga_file)
print("Loaded AMIGA halo IDs:", amiga_halo_ids)
print("Total particles:", len(amiga_halo_ids))
print("Example of halo IDs:", amiga_halo_ids[:10])  # Show the first 10 halo IDs

iord_particle_ids = load_iord(iord_file)
print("Loaded IORD particle IDs:", iord_particle_ids)
print("Total particles:", len(iord_particle_ids))
print("Example of particle IDs:", iord_particle_ids[:10]) # Show the first 10 particle IDs

unique_halos, counts = np.unique(amiga_halo_ids, return_counts=True)
print("Number of unique halos:", len(unique_halos))
print("Particles per halo:", counts)
print("Most populous halo ID and count:", unique_halos[np.argmax(counts)], np.max(counts))





Loaded AMIGA halo IDs: [133973400         0         0 ...         0         0         0]
Total particles: 133973401
Example of halo IDs: [133973400         0         0         0         0         0         0
         0         0         0]
Loaded IORD particle IDs: [133973400         0         1 ... 133973397 133973398 133973399]
Total particles: 133973401
Example of particle IDs: [133973400         0         1         2         3         4         5
         6         7         8]
Number of unique halos: 23917
Particles per halo: [75634913 10890459  6154067 ...       64       64        1]
Most populous halo ID and count: 0 75634913


In [12]:
def match_halos(amiga_halo_ids, iord_particle_ids):
    # Maps each particle to its halo
    particle_to_halo = {index: halo_id for index, halo_id in enumerate(amiga_halo_ids) if halo_id != 0}

    # Calculate the halo membership from iord particle IDs
    halo_membership = {}
    for particle_id in iord_particle_ids:
        if particle_id in particle_to_halo:
            halo_id = particle_to_halo[particle_id]
            if halo_id in halo_membership:
                halo_membership[halo_id].append(particle_id)
            else:
                halo_membership[halo_id] = [particle_id]

    return halo_membership

# Run the matching function
halo_matches = match_halos(amiga_halo_ids, iord_particle_ids)
for halo_id, members in halo_matches.items():
    print(f"Halo ID {halo_id} has {len(members)} particles from iord file.")

Halo ID 133973400 has 1 particles from iord file.
Halo ID 9869 has 286 particles from iord file.
Halo ID 162 has 30462 particles from iord file.
Halo ID 1358 has 2084 particles from iord file.
Halo ID 23873 has 98 particles from iord file.
Halo ID 29761 has 73 particles from iord file.
Halo ID 31221 has 68 particles from iord file.
Halo ID 1748 has 1824 particles from iord file.
Halo ID 7242 has 401 particles from iord file.
Halo ID 23791 has 98 particles from iord file.
Halo ID 24484 has 95 particles from iord file.
Halo ID 18789 has 132 particles from iord file.
Halo ID 16980 has 149 particles from iord file.
Halo ID 16500 has 154 particles from iord file.
Halo ID 6629 has 444 particles from iord file.
Halo ID 14318 has 183 particles from iord file.
Halo ID 11279 has 244 particles from iord file.
Halo ID 906 has 3718 particles from iord file.
Halo ID 15590 has 165 particles from iord file.
Halo ID 5799 has 512 particles from iord file.
Halo ID 22268 has 106 particles from iord file.


In [None]:
# Validate file paths before loading AHF catalogues
if validate_file_path(ahf_basename_s1):
    h_1 = pynbody.halo.ahf.AHFCatalogue(s_1, filename=ahf_basename_s1)
else:
    print("Skipping loading for simulation 1 due to missing file.")

if validate_file_path(ahf_basename_s2):
    h_2 = pynbody.halo.ahf.AHFCatalogue(s_2, filename=ahf_basename_s2)
else:
    print("Skipping loading for simulation 2 due to missing file.")

In [None]:
h_1 = h_2

In [None]:
print(h_1[1].dm.loadable_keys())
print(h_2[1].dm.loadable_keys())

In [None]:

min_overlap_percentage = .5
halo_match_counter = {}


min_overlap_percentage = 50  # Minimum overlap percentage to consider halos matched
halo_match_counter = {}

# Iterate over halos in the first simulation
for i in range(1, 7):  # Adjust range if needed
    iord_1 = h_1[i].dm['iord']
    num_particles_1 = len(iord_1)  # Total particles in halo i from sim 1
    print('n1', num_particles_1)

    # Iterate over halos in the second simulation
    for j in range(1, 7):  # Adjust range if needed
        iord_2 = h_2[j].dm['iord']
        common_particles = np.intersect1d(iord_1, iord_2)
        total_common = len(common_particles)
        
        num_particles_2 = len(iord_2)  # Total particles in halo j from sim 2
        
        # Calculate overlap as a percentage of each halo's particle count
        overlap_1 = (total_common / num_particles_1) * 100
        overlap_2 = (total_common / num_particles_2) * 100

        # Check if overlap meets the minimum threshold against the larger halo
        if max(overlap_1, overlap_2) >= min_overlap_percentage:
            # Preventing subhalo-host matches by ensuring the match is significant for the larger halo
            if max(num_particles_1, num_particles_2) == num_particles_2 and overlap_1 >= min_overlap_percentage:
                if (i, j) not in halo_match_counter:
                    halo_match_counter[(i, j)] = overlap_1
            elif max(num_particles_1, num_particles_2) == num_particles_1 and overlap_2 >= min_overlap_percentage:
                if (i, j) not in halo_match_counter:
                    halo_match_counter[(i, j)] = overlap_2


# Print the best matches that meet the criteria
for match, overlap in sorted(halo_match_counter.items(), key=lambda x: x[1], reverse=True):
    print(f'Halo {match[0]} in Simulation 1 matches Halo {match[1]} in Simulation 2 with {overlap:.2f}% overlap of particle IDs.')


In [None]:
import pandas as pd

# Assuming halo_match_counter contains the match data
# Convert dictionary to DataFrame for easier handling
matches_df = pd.DataFrame(list(halo_match_counter.items()), columns=['Match', 'Overlap'])
matches_df[['Halo1', 'Halo2']] = pd.DataFrame(matches_df['Match'].tolist(), index=matches_df.index)
matches_df.drop(columns=['Match'], inplace=True)

# Save to CSV file
matches_df.to_csv('halo_matches.csv', index=False)
