In [133]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from cmcrameri import cm

import os
from pathlib import Path
import shutil
import gzip
import re
import csv

import timeit

In [134]:
data_path = 'data/updated_simulation_data'
output_file_all_all = 'processed_data/collisions_all_data.csv'
output_file_all = 'processed_data/allcollisions_GiantBH_data.csv'
output_file_nocollision = 'processed_data/nocollision_GiantBH_data.csv'
output_file_iscollision = 'processed_data/iscollision_GiantBH_data.csv'
#header = "#N,#rv,#rg,#z,#t_snapshot[myr],#M1[MSUN],#M2[MSUN],#k1,#k2,#id1,#id2,#sma[AU],#ecc,#bin_star_radius0[RSUN],#bin_star_radius1[RSUN],#snapshot, #roche_lobe1_calc[RSUN], #roche_lobe2_calc[RSUN],#radrol0,#radrol1"

output_file_initial = 'processed_data/initial_GiantBH_data.csv'
output_file_initial_all = 'processed_data/initial_all_GiantBH_data.csv'


In [135]:
def get_folder_names(directory):
    try:
        # List all entries in the specified directory
        entries = os.listdir(directory)
        
        # Filter out only the folders
        folders = [entry for entry in entries if os.path.isdir(os.path.join(directory, entry))]
        
        return folders
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Specify the directory
directory_path = r'data/updated_simulation_data'

# Get the list of folder names
str_numbers = get_folder_names(directory_path)

# Print the list of folder names
print("Folder names:", str_numbers)
print("Number of folders:", len(str_numbers))

Folder names: ['N16_rv0.5_rg2.0_z0.002', 'N16_rv0.5_rg2.0_z0.02', 'N16_rv0.5_rg20.0_z0.002', 'N16_rv0.5_rg20.0_z0.02', 'N16_rv0.5_rg8.0_z0.002', 'N16_rv0.5_rg8.0_z0.02', 'N16_rv1.0_rg2.0_z0.0002', 'N16_rv1.0_rg2.0_z0.002', 'N16_rv1.0_rg2.0_z0.02', 'N16_rv1.0_rg20.0_z0.0002', 'N16_rv1.0_rg20.0_z0.002', 'N16_rv1.0_rg20.0_z0.02', 'N16_rv1.0_rg8.0_z0.0002', 'N16_rv1.0_rg8.0_z0.002', 'N16_rv1.0_rg8.0_z0.02', 'N16_rv2.0_rg2.0_z0.0002', 'N16_rv2.0_rg2.0_z0.002', 'N16_rv2.0_rg2.0_z0.02', 'N16_rv2.0_rg20.0_z0.0002', 'N16_rv2.0_rg20.0_z0.002', 'N16_rv2.0_rg20.0_z0.02', 'N16_rv2.0_rg8.0_z0.0002', 'N16_rv2.0_rg8.0_z0.002', 'N16_rv2.0_rg8.0_z0.02', 'N16_rv4.0_rg2.0_z0.0002', 'N16_rv4.0_rg2.0_z0.002', 'N16_rv4.0_rg2.0_z0.02', 'N16_rv4.0_rg20.0_z0.0002', 'N16_rv4.0_rg20.0_z0.002', 'N16_rv4.0_rg20.0_z0.02', 'N16_rv4.0_rg8.0_z0.0002', 'N16_rv4.0_rg8.0_z0.002', 'N16_rv4.0_rg8.0_z0.02', 'N2.0_rv0.5_rg2.0_z0.0002', 'N2.0_rv0.5_rg2.0_z0.002', 'N2.0_rv0.5_rg2.0_z0.02', 'N2.0_rv0.5_rg20.0_z0.0002', 'N2.0_rv0

In [136]:
def get_rows_of_interest(input_file, ids_of_interest):
    # Set of star IDs of interest for quick lookup
    ids_set = set(list(map(float, ids_of_interest)))
    rows_of_interest = []

    with open(input_file, 'r') as infile:
        for line in infile:
            # Skip comments and empty lines
            if line.startswith('#') or not line.strip():
                continue

            # Split the line into columns
            columns = line.split()

            # Check if the row is a binary system or a single star
            id_num = float(columns[0])
            binflag = float(columns[7])
            
            # Check if the current ID or its binary partner's ID is in the set of IDs of interest
            if id_num in ids_set or (binflag == 1 and (float(columns[10]) in ids_set or float(columns[11]) in ids_set)):
                rows_of_interest.append(columns)

    return rows_of_interest


In [137]:
id_Giant = [2, 3, 4, 5, 6, 7, 8, 9]
id_BH = [14]

all_data = []

for str_number in str_numbers:

    #print("Processing model", str_number)
    # Go through each model individually
    initial_path = f'data/updated_simulation_data/{str_number}/info/initial.snap0000.dat'
    conv_path = f'data/updated_simulation_data/{str_number}/info/initial.conv.sh'

    # Read the file
    with open(conv_path, 'r') as file:
        lines = file.readlines();

    # Extract the value for timeunitsmyr
    for line in lines:
        if line.startswith('timeunitsmyr'):
            _, value = line.split('=');
            timeunitsmyr = float(value.strip());
            #print(timeunitsmyr);
            break

    if os.path.exists(initial_path):

        ### Grab the values of the parameters from the file path
        # Define the regular expression pattern
        pattern = r"N(?P<N>[\d.]+)_rv(?P<rv>[\d.]+)_rg(?P<rg>[\d.]+)_z(?P<z>[\d.]+)";
        # Search for the pattern in the file path
        match = re.search(pattern, str_number);
        # If a match is found, extract the values and convert them to floats
        if match:
            mod_params = [float(match.group('N')), float(match.group('rv')), float(match.group('rg')), float(match.group('z'))];
        else:
            print(f"Pattern {str_number} not found in the file path.");

        # List to hold the values from the 6th column
        id_Giant = []

        # Open the CSV file and read its contents
        with open(output_file_nocollision, mode='r') as file:
            reader = csv.reader(file)
            
            # Skip the header row
            header = next(reader)
            
            # Iterate through each row in the CSV file
            for row in reader:
                #print(mod_params, row)
                
                # Check if the first four columns match the model parameters
                if float(row[0]) == float(mod_params[0]) and float(row[1]) == float(mod_params[1]) and float(row[2]) == float(mod_params[2]) and float(row[3]) == float(mod_params[3]):
                    # Append the value from the 6th column to the list
                    id_Giant.append(row[10])
        
        print(id_Giant)

        result = get_rows_of_interest(initial_path, id_Giant)
        print(len(result))
        mod_params.append('0')

        # Append the model parameters to the start of each sublist
        data_list = [mod_params + sublist if type(sublist) is list else mod_params + [sublist] for sublist in result];
        #print(data_list)

        # Append the data to the list of all data
        if len(data_list) > 0:
            all_data += data_list
        

        print(f"Finished {str_number}. All data: {len(all_data)}. Data list: {len(data_list)}")
            

    else:
        print(f"No such file {initial_path}")
        


['653285', '528064', '1207646', '534376', '1387359', '522859', '467378', '1058828', '992229', '483723', '62854', '43805', '577521', '873664', '5956', '1632674', '739061', '311248', '585680', '35591', '439402', '1518377', '1091516', '1152600', '337490', '697926', '839729', '270429', '1663020', '835619', '1072269', '1044464', '199699', '876646', '761202', '557416', '155756', '73505', '459766', '64880', '722106', '108604', '189868', '490145', '956723', '269550', '1422864', '321552', '90640', '636733', '530779', '294595', '981853', '1155119', '883505', '311906', '1482834', '187389', '150826', '955749', '48901', '557173', '845993', '177352', '562466', '1183385', '493336', '736542', '1113882', '779888', '347', '1185636', '290160', '892351', '585987', '1240796', '1414248', '416522', '658939', '1058804', '152404', '1064146', '1359638', '1099389', '167955', '592093', '517274', '222742', '1332436', '1262966', '390175', '113556', '590247', '1014814', '302972', '894548', '1368170', '1438736', '760

In [138]:
headers = ["N", "rv", "rg", "z", "t_snapshot[myr]", "id", "m[MSUN]", "r", "vr", "vt", "E", "J", "binflag", "m0[MSUN]", "m1[MSUN]", "id0", "id1", "a[AU]", "e", "startype", "luminosity[LSUN]", "radius[RSUN]", "bin_startype0", "bin_startype1", "bin_star_lum0[LSUN]", "bin_star_lum1[LSUN]", "bin_star_radius0[RSUN]", "bin_star_radius1[RSUN]", "bin.Eb", "eta", "star.phi", "rad0", "rad1", "tb", "lum0", "lum1", "massc0", "massc1", "radc0", "radc1", "menv0", "menv1", "renv0", "renv1", "tms0", "tms1", "dmdt0", "dmdt1", "radrol0", "radrol1", "ospin0", "ospin1", "B0", "B1", "formation0", "formation1", "bacc0", "bacc1", "tacc0", "tacc1", "mass0_0", "mass0_1", "epoch0", "epoch1", "ospin", "B", "formation"]


with open(output_file_initial, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)
    writer.writerows(all_data)

In [139]:
id_Giant = [2, 3, 4, 5, 6, 7, 8, 9]
id_BH = [14]

all_data_all = []

for str_number in str_numbers:

    #print("Processing model", str_number)
    # Go through each model individually
    initial_path = f'data/updated_simulation_data/{str_number}/info/initial.snap0000.dat'
    conv_path = f'data/updated_simulation_data/{str_number}/info/initial.conv.sh'

    # Read the file
    with open(conv_path, 'r') as file:
        lines = file.readlines();

    # Extract the value for timeunitsmyr
    for line in lines:
        if line.startswith('timeunitsmyr'):
            _, value = line.split('=');
            timeunitsmyr = float(value.strip());
            #print(timeunitsmyr);
            break

    if os.path.exists(initial_path):

        ### Grab the values of the parameters from the file path
        # Define the regular expression pattern
        pattern = r"N(?P<N>[\d.]+)_rv(?P<rv>[\d.]+)_rg(?P<rg>[\d.]+)_z(?P<z>[\d.]+)";
        # Search for the pattern in the file path
        match = re.search(pattern, str_number);
        # If a match is found, extract the values and convert them to floats
        if match:
            mod_params = [float(match.group('N')), float(match.group('rv')), float(match.group('rg')), float(match.group('z'))];
        else:
            print(f"Pattern {str_number} not found in the file path.");

        # List to hold the values from the 6th column
        id_Giant = []

        # Open the CSV file and read its contents
        with open(output_file_all, mode='r') as file:
            reader = csv.reader(file)
            
            # Skip the header row
            header = next(reader)
            
            # Iterate through each row in the CSV file
            for row in reader:
                #print(mod_params, row)
                
                # Check if the first four columns match the model parameters
                if float(row[0]) == float(mod_params[0]) and float(row[1]) == float(mod_params[1]) and float(row[2]) == float(mod_params[2]) and float(row[3]) == float(mod_params[3]):
                    # Append the value from the 6th column to the list
                    id_Giant.append(row[10])
        
        print(id_Giant)

        result = get_rows_of_interest(initial_path, id_Giant)
        print(len(result))
        mod_params.append('0')

        # Append the model parameters to the start of each sublist
        data_list = [mod_params + sublist if type(sublist) is list else mod_params + [sublist] for sublist in result];
        #print(data_list)

        # Append the data to the list of all data
        if len(data_list) > 0:
            all_data_all += data_list
        

        print(f"Finished {str_number}. All data: {len(all_data)}. Data list: {len(data_list)}")
            

    else:
        print(f"No such file {initial_path}")
        


['1716160', '653285', '3097671', '528064', '2104645', '1832129', '1207646', '1698319', '1976079', '1766483', '1692123', '1813916', '1947221', '1756373', '1733730', '1756545', '1747743', '1687668', '1687364', '1708193', '1927221', '1813938', '534376', '1752352', '1876874', '1811973', '1876378', '2326442', '1928390', '1751291', '1387359', '1695579', '522859', '2132482', '1706783', '1703203', '467378', '1826107', '1729217', '1723800', '2203112', '2035066', '1684954', '1708695', '1772893', '1058828', '992229', '483723', '1697680', '1767933', '2056045', '1487224', '62854', '43805', '577521', '873664', '5956', '1990456', '1632674', '739061', '311248', '585680', '127179', '35591', '439402', '1776581', '1518377', '1091516', '1152600', '337490', '1620483', '697926', '839729', '270429', '1663020', '835619', '1072269', '1044464', '199699', '876646', '1943963', '761202', '2053737', '557416', '155756', '73505', '459766', '64880', '722106', '108604', '318510', '189868', '490145', '956723', '269550',

In [140]:
headers = ["N", "rv", "rg", "z", "t_snapshot[myr]", "id", "m[MSUN]", "r", "vr", "vt", "E", "J", "binflag", "m0[MSUN]", "m1[MSUN]", "id0", "id1", "a[AU]", "e", "startype", "luminosity[LSUN]", "radius[RSUN]", "bin_startype0", "bin_startype1", "bin_star_lum0[LSUN]", "bin_star_lum1[LSUN]", "bin_star_radius0[RSUN]", "bin_star_radius1[RSUN]", "bin.Eb", "eta", "star.phi", "rad0", "rad1", "tb", "lum0", "lum1", "massc0", "massc1", "radc0", "radc1", "menv0", "menv1", "renv0", "renv1", "tms0", "tms1", "dmdt0", "dmdt1", "radrol0", "radrol1", "ospin0", "ospin1", "B0", "B1", "formation0", "formation1", "bacc0", "bacc1", "tacc0", "tacc1", "mass0_0", "mass0_1", "epoch0", "epoch1", "ospin", "B", "formation"]


with open(output_file_initial_all, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)
    writer.writerows(all_data_all)

In [141]:
# Load the CSV file into a numpy array
initial_nocollision_data = np.loadtxt(output_file_initial, delimiter=',', dtype=str, skiprows=1)

# Load the CSV file into a numpy array
nocollision_data = np.loadtxt(output_file_nocollision, delimiter=',', dtype=str, skiprows=1)



initial_nocollision_ids = initial_nocollision_data[::, 5]

nocollision_ids = nocollision_data[::, 10]



missing_ids = (set(initial_nocollision_ids) ^ set(nocollision_ids))

print(len(missing_ids))

199


In [142]:
# Load the CSV file into a numpy array
initial_nocollision_data = np.loadtxt(output_file_initial, delimiter=',', dtype=str, skiprows=1)

# Load the CSV file into a numpy array
nocollision_data = np.loadtxt(output_file_nocollision, delimiter=',', dtype=str, skiprows=1)

initial_ids = [x for x in np.concatenate((initial_nocollision_data[::, 5], initial_nocollision_data[::, 15], initial_nocollision_data[::, 16])) if float(x) != -100]

late_ids = nocollision_data[::, 10]

print(len(set(initial_ids)))
print(len(set(late_ids)))

4814
4620


In [143]:
final_list = [i for i in initial_ids if i in late_ids]

In [144]:
print(len(set(final_list)))

### this shows that the number of ids are the same in the initial and late files. the difference in length is attributed to binaries. 

4620


In [145]:
import collections
print([item for item, count in collections.Counter(late_ids).items() if count > 1])

### this actually shows duplicates, not missing values

['90640', '187389', '557173', '394180', '261928', '810261', '750418', '714651', '191780', '615852', '445911', '402272', '137504', '210951', '369869', '809511', '275238', '562093', '1212753', '342876', '24277', '818795', '162611', '231448', '398995', '135356', '182941', '478812', '762451', '96658', '1526498', '1469597', '210253', '487663', '211742', '450335', '727053', '301153', '266957', '192818', '620208', '358229', '172010', '294177', '13921', '362191', '85937', '688215', '180617', '124479', '509124', '10893', '686152', '194172', '461911', '129198', '703783', '154985', '103496', '204608', '340413', '128097', '56897', '158990', '125972', '371212', '58532', '663276', '943042', '41737', '165645', '716777', '196747', '568618', '372036', '277670', '204762', '90388', '571750', '103696', '94651', '309577', '170577', '152672', '357448', '209416', '611066', '1584613', '56583', '694915', '278101', '731876', '205448', '67040', '5627', '89406', '154983', '163908', '102231', '356027', '172062', '