In [1]:
import numpy as np

import os
from pathlib import Path
import shutil
import gzip
import re
import csv

import timeit

In [2]:
data_path = 'data/updated_simulation_data'

outputesc_file = 'processed_data/updated_esc_data.csv'

id_WD = [10, 11, 12]
id_BH = [14]

header = "#N,#rv,#rg,#z,  #t_snapshot[myr],#M0[MSUN],#M1[MSUN],#id0,#id1,#sma,#ecc,#star_radius0[RSUN],#star_radius1[RSUN],#radrol0,#radrol1"

In [3]:
all_esc = []

### Grab all values
# Loop over all model folders
for folder in os.listdir(data_path):
    folder_path = os.path.join(data_path, folder);
    # Checks is a folder
    if os.path.isdir(folder_path):
        # Loops through all the snapshots
        for subfolder in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path, subfolder);
            
            ### Find the timeunitsmyr conversion value
            if os.path.isdir(subfolder_path):
                for infosubfolder in os.listdir(subfolder_path):
                    if infosubfolder.endswith('.sh'):
                        infosubfolder_path = os.path.join(subfolder_path, infosubfolder);
                        # Read the file
                        with open(infosubfolder_path, 'r') as file:
                            lines = file.readlines();

                        # Extract the value for timeunitsmyr
                        for line in lines:
                            if line.startswith('timeunitsmyr'):
                                _, value = line.split('=');
                                timeunitsmyr = float(value.strip());
                                print(timeunitsmyr);
                                break
                
                for infosubfolder in os.listdir(subfolder_path):
                    ### Obtain desired data from info files
                    if infosubfolder.endswith('.dat'):
                        infosubfolder_path = os.path.join(subfolder_path, infosubfolder);
                        ### Grab the values of the parameters from the file path
                        # Define the regular expression pattern
                        pattern = r"N(?P<N>[\d.]+)_rv(?P<rv>[\d.]+)_rg(?P<rg>[\d.]+)_z(?P<z>[\d.]+)";
                        # Search for the pattern in the file path
                        match = re.search(pattern, subfolder_path);
                        # If a match is found, extract the values and convert them to floats
                        if match:
                            mod_params = [float(match.group('N')), float(match.group('rv')), float(match.group('rg')), float(match.group('z'))];
                        else:
                            print("Pattern not found in the file path.");
                            
                        ### Read data from files
                        # Load the data from a file
                        data_array = np.genfromtxt(infosubfolder_path, comments="#");

                        # Skip over the files that are empty
                        if list(data_array.shape)[-1] == 63:
                            # Check if the list is a list of lists. If not, make it one. 
                            if all(isinstance(sublist, list) for sublist in data_array.tolist()):
                                data_list = data_array.tolist();
                            else:
                                data_list = [data_array.tolist()];
                            
                            events_WDBH = np.array(data_list)#[check_bin];
                            
                            # Grab the binflags
                            binflags = np.array(data_list)[:, 14].tolist();
                            # Check binflags
                            check_bin = np.array([flag == 1 for flag in binflags]);
                            events_bin = np.array(data_list)[check_bin];
                            
                            # Check WDBH
                            check_WDBH = ((np.isin(events_bin[:, 22], id_BH) & np.isin(events_bin[:, 23], id_WD)) | (np.isin(events_bin[:, 23], id_BH) & np.isin(events_bin[:, 22], id_WD)));
                            events_WDBH = events_bin[check_WDBH];
                            
                            # Grab the data we want
                            events_WDBH_data = np.array([timeunitsmyr * events_WDBH[::,1], events_WDBH[::, 15].astype(float), events_WDBH[::, 16].astype(float), events_WDBH[::, 17].astype(float), events_WDBH[::, 18].astype(float), events_WDBH[::, 19].astype(float), events_WDBH[::, 20].astype(float), events_WDBH[::, 24].astype(float), events_WDBH[::, 25].astype(float), events_WDBH[::, 41].astype(float), events_WDBH[::, 42].astype(float)]).T;
                                        #N,#rv,#rg,#z,  #t_snapshot[myr],                   #M0[MSUN],                          #M1[MSUN],                          #id0,                           #id1,                                      #sma                               #ecc                          #star_radius0[RSUN],             #star_radius1[RSUN],                              #radrol0                        #radrol1

                            # Append the model parameters to the start of each sublist
                            data_list = [mod_params + sublist if type(sublist) is list else mod_params + [sublist] for sublist in events_WDBH_data.tolist()];
                                
                            # Append the data to the list of all data
                            if len(data_list) > 0:
                                all_esc += data_list;
                break

print(all_esc)

884.472
884.472
884.472
884.472
884.472
884.472
2501.66
2501.66
2501.66
2501.66
2501.66
2501.66
2501.66
2501.66
2501.66
7075.78
7075.78
7075.78
7075.78
7075.78
7075.78
7075.78
7075.78
7075.78
20013.3
20013.3
20013.3
20013.3
20013.3
20013.3
20013.3
20013.3
20013.3
400.802
400.802
400.802
400.802
400.802
400.802
400.802
400.802
400.802
1133.64
1133.64
1133.64
1133.64
1133.64
1133.64
1133.64
1133.64
1133.64
3206.41
3206.41
3206.41
3206.41
3206.41
3206.41
3206.41
3206.41
3206.41
9069.11
9069.11
9069.11
9069.11
9069.11
9069.11
9069.11
9069.11
9069.11
3306.65
3306.65
9352.61
9352.61
516.414
516.414
516.414
516.414
516.414
516.414
516.414
516.414
395.298
1460.64
1460.64
1460.64
1460.64
1460.64
1460.64
1460.64
1460.64
1460.64
4131.31
4131.31
4131.31
4131.31
4131.31
4131.31
4131.31
4131.31
4131.31
11685.1
11685.1
11685.1
11685.1
11685.1
11685.1
11685.1
11685.1
11685.1
673.893
673.893
673.893
673.893
673.893
673.893
673.893
673.893
673.893
1906.06
1906.06
1906.06
1906.06
1906.06
1906.06
1906.06


In [4]:
### Write the data to the output file
with open(outputesc_file, 'w') as file:
    # Write the header
    file.write(header + '\n')

    # Write the data
    for row in all_esc:
        file.write(','.join(map(str, row)) + '\n')

In [5]:

# Example CSV file path
csv_file = 'processed_data/WDBH_filtered.csv'

# Initialize an empty list to store rows
rows = []

# Open the CSV file for reading
with open(csv_file, newline='') as csvfile:
    # Create a CSV reader object
    csv_reader = csv.reader(csvfile)
    next(csv_reader)
    # Iterate over each row in the CSV
    for row in csv_reader:
        # Append each row (which is already a list) to the rows list
        rows.append(row)

ids = list(dict.fromkeys(np.concatenate((np.array(rows).T[9],np.array(rows).T[10]))))

ids = [ int(float(id)) for id in ids ]

print(ids)


[1863772, 780000, 118226, 946969, 89672, 1997988, 2717065, 122895, 1477374, 942447]


In [6]:
def convert_first_element(lst):
    if lst[0] == '16.0':
        lst[0] = 16
    if lst[0] == '32.0':
        lst[0] = 32
    return lst

numbers = np.array(rows)[::, :4]

str_numbers = []

for number in numbers:
    number = convert_first_element(number)
    # Format each number as required
    formatted_numbers = [f'{num:.1f}' if isinstance(num, float) else f'{num}' for num in number]
    
    # Join them into the desired string format
    output_string = f'N{formatted_numbers[0]}_rv{formatted_numbers[1]}_rg{formatted_numbers[2]}_z{formatted_numbers[3]}'
    str_numbers.append(output_string)

str_numbers = list(dict.fromkeys(str_numbers))
str_numbers += str_numbers

print(str_numbers)

['N16_rv0.5_rg8_z0.02', 'N16_rv1_rg8_z0.02', 'N16_rv2_rg20_z0.02', 'N8_rv0.5_rg8_z0.0002', 'N8_rv1_rg8_z0.02', 'N16_rv0.5_rg8_z0.02', 'N16_rv1_rg8_z0.02', 'N16_rv2_rg20_z0.02', 'N8_rv0.5_rg8_z0.0002', 'N8_rv1_rg8_z0.02']


In [7]:
rows = []
outputesc_file = 'processed_data/updated_esc_data.csv'


# Open the CSV file for reading
with open(outputesc_file, newline='') as csvfile:
    # Create a CSV reader object
    csv_reader = csv.reader(csvfile)
    next(csv_reader)
    # Iterate over each row in the CSV
    for row in csv_reader:
        # Append each row (which is already a list) to the rows list
        rows.append(row)


print(rows)

[['16.0', '0.5', '2.0', '0.02', '12526.239177024001', '1.092161', '3.0921792', '1369461.0', '1699655.0', '0.53327182', '0.20874416', '0.00700214', '1.31108e-05', '0.000208193', '2.42873e-07'], ['16.0', '0.5', '20.0', '0.02', '492.70542285408', '0.01119521', '7.8284501', '448480.0', '1750184.0', '0.0047183682', '5.6789052e-05', '0.058006', '3.31926e-05', '1.0517', '4.20152e-05'], ['16.0', '0.5', '20.0', '0.02', '10640.788987296', '1.21915', '6.5990692', '516042.0', '1703726.0', '0.023958473', '0.69371612', '0.00467737', '2.79801e-05', '0.00368796', '1.02987e-05'], ['16.0', '0.5', '20.0', '0.02', '10971.64519728', '1.0322056', '6.6709101', '1588003.0', '2082159.0', '0.012080151', '0.8401296', '0.00769367', '2.82847e-05', '0.0126439', '2.00766e-05'], ['16.0', '0.5', '8.0', '0.002', '1089.7263755495999', '11.915741', '0.0095184771', '261741.0', '2414156.0', '0.0060209495', '2.2609522e-05', '5.05227e-05', '0.0612388', '4.94919e-05', '1.05318'], ['16.0', '0.5', '8.0', '0.02', '13791.44276308