In [16]:
import numpy as np

import os
from pathlib import Path
import shutil
import gzip
import re
import csv

import timeit

In [17]:
dataWDBH_path = 'updated_WDBH_data.csv'
data_path = 'data/updated_simulation_data'

outputfiltered_file = 'updated_WDBH_filtered.csv'
outputesc_file = 'updated_esc_data.csv'

id_WD = [10, 11, 12]
id_BH = [14]

In [18]:
# log_path = 'data/updated_simulation_data/N2.0_rv0.5_rg8.0_z0.002/info/initial.binint.log'


# with open(log_path, 'r') as file:
#     data = file.read()

# sections = data.split('********************************************************************************')

# parsed_data = []
# for section in sections:
#     if section.strip() == "":
#         continue
    
#     time_match = re.search(r'type=BS t=([\d.eE+-]+)', section)
#     time = time_match.group(1) if time_match else None
    
#     inputs = re.findall(r'input: (.*)', section)
#     outputs = re.findall(r'output: (.*)', section)
    
#     parsed_data.append({
#         'time': time,
#         'inputs': inputs,
#         'outputs': outputs
#     })

# print(parsed_data[0]['inputs'][0])

In [19]:
### Grab all values
def find_conv(data_path):
    convs = {}
    # Match variables
    pattern = re.compile(r'^\s*(\w+)\s*=\s*([\w.+-eE]+)\s*$', re.MULTILINE)

    # Read the file
    with open(data_path, 'r') as file:
        content = file.read()

    # Find all matches
    matches = pattern.findall(content)
    matches.pop(0)

    # Store matches in the dictionary
    for match in matches:
        variable, value = match
        convs[variable] = float(value)
    
    locals().update(convs)
    # Takes keys from dictionary and assigns them as variables with the value from the dictionary
    return convs



def parse_log_file(file_path, time_conv=1.0):
    with open(file_path, 'r') as file:
        log_content = file.read()

    # Split the content into sections
    sections = log_content.split('********************************************************************************')
    
    parsed_sections = []

    # Regular expressions to match the required lines
    type_time_pattern = re.compile(r'type=(\S+) t=(\S+)')
    input_pattern = re.compile(r'input: (.*)')
    output_pattern = re.compile(r'output: (.*)')

    for section in sections:
        if not section.strip():
            continue

        type_time_match = type_time_pattern.search(section)
        if type_time_match:
            type_value = type_time_match.group(1)
            time_value = float(type_time_match.group(2)) * time_conv
        else:
            type_value = None
            time_value = None

        inputs = []
        outputs = []

        for line in section.split('\n'):
            input_match = input_pattern.match(line)
            if input_match:
                input_data = parse_key_value_pairs(input_match.group(1))
                calculate_roche_lobe_radius_input(input_data)
                outputs.append(input_data)
            
            output_match = output_pattern.match(line)
            if output_match:
                output_data = parse_key_value_pairs(output_match.group(1))
                calculate_roche_lobe_radius_output(output_data)
                outputs.append(output_data)

        parsed_sections.append({
            'type': type_value,
            'time': time_value,
            'inputs': inputs,
            'outputs': outputs
        })

    return parsed_sections

def parse_key_value_pairs(string):
    kv_pairs = string.split()
    parsed_dict = {}
    for pair in kv_pairs:
        if '=' in pair:
            key, value = pair.split('=')
            try:
                parsed_dict[key] = float(value)
            except ValueError:
                parsed_dict[key] = value
    return parsed_dict

def calculate_roche_lobe_radius_output(output_data):
    if 'm0' in output_data and 'm1' in output_data and 'a' in output_data:
        m0 = output_data['m0']
        m1 = output_data['m1']
        a = output_data['a']
        q0 = m0 / m1
        q1 = m1 / m0
        roche_lobe_radius_out0 = (0.49 * q0**(2/3)) / (0.6 * q0**(2/3) + np.log(1 + q0**(1/3)))
        roche_lobe_radius_out1 = (0.49 * q1**(2/3)) / (0.6 * q1**(2/3) + np.log(1 + q1**(1/3)))
        output_data['radrol0'] = roche_lobe_radius_out0
        output_data['radrol1'] = roche_lobe_radius_out1
    

def calculate_roche_lobe_radius_input(input_data):
    if 'm0' in input_data and 'm1' in input_data and 'a' in input_data:
        m0 = input_data['m0']
        m1 = input_data['m1']
        a = input_data['a']
        q0 = m0 / m1
        q1 = m1 / m0
        roche_lobe_radius_in0 = (0.49 * q0**(2/3)) / (0.6 * q0**(2/3) + np.log(1 + q0**(1/3)))
        roche_lobe_radius_in1 = (0.49 * q1**(2/3)) / (0.6 * q1**(2/3) + np.log(1 + q1**(1/3)))
        input_data['radrol0'] = roche_lobe_radius_in0
        input_data['radrol1'] = roche_lobe_radius_in1


def search_sections_by_id(parsed_log, search_id):
    matching_sections = []
    id_pattern = re.compile(rf'(=|:|^|\b){re.escape(search_id)}(:|$|\b|.0)')

    for section in parsed_log:
        found = False
        for output in section['outputs']:
            for key, value in output.items():
                if 'id' in key and id_pattern.search(str(value)):
                    found = True
                    break
            if found:
                break
        if found:
            matching_sections.append(section)
    
    return matching_sections

def format_section(section):
    formatted_section = []
    formatted_section.append(f"type={section['type']} t={section['time']}")
    
    for input_item in section['inputs']:
        input_line = "input: " + " ".join(f"{k}={v}" for k, v in input_item.items())
        formatted_section.append(input_line)
    
    for output_item in section['outputs']:
        output_line = "output: " + " ".join(f"{k}={v}" for k, v in output_item.items())
        formatted_section.append(output_line)
    
    return "\n".join(formatted_section)

def write_matching_sections_to_file(matching_sections, output_file_path):
    with open(output_file_path, 'w') as file:
        for i, section in enumerate(matching_sections):
            if i > 0:
                file.write("********************************************************************************\n")
            formatted_section = format_section(section)
            file.write(formatted_section + "\n")

def sort_sections_by_time(parsed_log):
    return sorted(parsed_log, key=lambda x: x['time'])



In [20]:
# Example CSV file path
csv_file = 'processed_data/updated_WDBH_filtered.csv'

# Initialize an empty list to store rows
rows = []

# Open the CSV file for reading
with open(csv_file, newline='') as csvfile:
    # Create a CSV reader object
    csv_reader = csv.reader(csvfile)
    next(csv_reader)
    # Iterate over each row in the CSV
    for row in csv_reader:
        # Append each row (which is already a list) to the rows list
        rows.append(row)

ids = list(dict.fromkeys(np.concatenate((np.array(rows).T[9],np.array(rows).T[10]))))

ids = [ int(float(id)) for id in ids ]

print(ids)

[1863772, 780000, 118226, 946969, 89672, 1997988, 2717065, 122895, 1477374, 942447]


In [21]:
def convert_first_element(lst):
    if lst[0] == '8':
        lst[0] = 8.0
    if lst[0] == '16.0':
        lst[0] = 16
    if lst[0] == '32.0':
        lst[0] = 32
    return lst

def convert_third_element(lst):
    if lst[2] == '8':
        lst[2] = 8.0
    return lst


numbers = np.array(rows)[::, :4]

str_numbers = []

for number in numbers:
    number = convert_first_element(number)
    number = convert_third_element(number)
    # Format each number as required
    formatted_numbers = [f'{num:.1f}' if isinstance(num, float) else f'{num}' for num in number]
    
    # Join them into the desired string format
    output_string = f'N{formatted_numbers[0]}_rv{formatted_numbers[1]}_rg{formatted_numbers[2]}_z{formatted_numbers[3]}'
    str_numbers.append(output_string)

str_numbers = list(dict.fromkeys(str_numbers))
str_numbers += str_numbers
print(str_numbers)

['N16_rv0.5_rg8.0_z0.02', 'N16_rv1.0_rg8.0_z0.02', 'N16_rv2.0_rg20.0_z0.02', 'N8.0_rv0.5_rg8.0_z0.0002', 'N8.0_rv1.0_rg8.0_z0.02', 'N16_rv0.5_rg8.0_z0.02', 'N16_rv1.0_rg8.0_z0.02', 'N16_rv2.0_rg20.0_z0.02', 'N8.0_rv0.5_rg8.0_z0.0002', 'N8.0_rv1.0_rg8.0_z0.02']


In [22]:
for idno,str_number in zip(ids,str_numbers):
    # str_number = str_numbers[0]
    # idno = ids[0]
    log_path = f'data/updated_simulation_data/{str_number}/info/initial.binint.log'
    conv_path = f'data/updated_simulation_data/{str_number}/info/initial.conv.sh'
    conv = find_conv(conv_path)
    print(conv)
    parsed_log = parse_log_file(log_path, time_conv=conv['timeunitsmyr'])

    #print(len(parsed_log))
    search_id = str(idno)
    matching_sections = search_sections_by_id(parsed_log, search_id)

        # for section in matching_sections:
        #     print(section)
    sorted_sections = sort_sections_by_time(matching_sections)
    output_file_path = f'processed_data/binint_outputs/{str_number}_{idno}.txt'
    write_matching_sections_to_file(sorted_sections, output_file_path)


{'massunitcgs': 1.93009e+39, 'massunitmsun': 970382.0, 'mstarunitcgs': 1.20631e+33, 'mstarunitmsun': 0.606489, 'lengthunitcgs': 1.54285e+18, 'lengthunitparsec': 0.5, 'timeunitcgs': 2.79112e+16, 'timeunitsmyr': 884.472, 'nbtimeunitcgs': 168869000000.0, 'nbtimeunitsmyr': 0.00535125}
{'massunitcgs': 1.93009e+39, 'massunitmsun': 970382.0, 'mstarunitcgs': 1.20631e+33, 'mstarunitmsun': 0.606489, 'lengthunitcgs': 3.0857e+18, 'lengthunitparsec': 1.0, 'timeunitcgs': 7.89449e+16, 'timeunitsmyr': 2501.66, 'nbtimeunitcgs': 477633000000.0, 'nbtimeunitsmyr': 0.0151356}
{'massunitcgs': 1.93009e+39, 'massunitmsun': 970382.0, 'mstarunitcgs': 1.20631e+33, 'mstarunitmsun': 0.606489, 'lengthunitcgs': 6.1714e+18, 'lengthunitparsec': 2.0, 'timeunitcgs': 2.2329e+17, 'timeunitsmyr': 7075.78, 'nbtimeunitcgs': 1350950000000.0, 'nbtimeunitsmyr': 0.04281}
{'massunitcgs': 9.64354e+38, 'massunitmsun': 484844.0, 'mstarunitcgs': 1.20544e+33, 'mstarunitmsun': 0.606055, 'lengthunitcgs': 1.54285e+18, 'lengthunitparsec':

In [23]:
print((matching_sections))

[{'type': 'BB', 'time': 9649.042448819799, 'inputs': [], 'outputs': [{'type': 'binary', 'm0': 7.62734, 'm1': 0.642459, 'R0': 0.0001617, 'R1': 0.0122182, 'Eint1': 0.0, 'Eint2': 0.0, 'id0': 89672.0, 'id1': 942447.0, 'a': 0.0651425, 'e': 0.0, 'ktype1': 14.0, 'ktype2': 11.0, 'radrol0': 0.5915571286251049, 'radrol1': 0.19665202727433867}, {'type': 'binary', 'm0': 1.01977, 'm1': 0.156212, 'R0': 1.48581, 'R1': 0.193077, 'Eint1': 0.0, 'Eint2': 0.0, 'id0': 405384.0, 'id1': 837014.0, 'a': 9.53658, 'e': 0.666443, 'ktype1': 1.0, 'ktype2': 0.0, 'radrol0': 0.5433970008441394, 'radrol1': 0.23366998735078193}, {'type': 'triple', 'min0': 7.62734, 'min1': 0.642459, 'mout': 0.156212, 'Rin0': 0.0001617, 'Rin1': 0.0122182, 'Rout': 0.193077, 'Eintin0': 0.0, 'Eintin1': 0.0, 'Eintout': 0.0, 'idin1': 89672.0, 'idin2': 942447.0, 'idout': 837014.0, 'ain': 0.0651425, 'aout': 47.8579, 'ein': 6.71838e-05, 'eout': 0.808993, 'ktypein1': 11.0, 'ktypeout': 0.0}, {'type': 'single', 'm': 1.01977, 'R': 1.48581, 'Eint': 0.


DONE print out the radius of the stars too. calculate size of roche lobe (formula), check if wd fills roche lobe. cut the ones that aren't. 

DONE cut interactions ealier than 9gyr. check for repeats, only keep first and last interaction. 


DONE check escaped binaries. i.e. record last binary interaction time, record escape time. if not escaped, check binint to see how the binary broke apart. 

DONE to check binary formation, check binint to see when first time binary appears in output (making sure same binary is not in the input). binint produce list of systems, and then check through with data file again if there's any missing in either one. 


DONE use radrol number to create list again.

DONE use binint to find the binary, check ids are the same, 

calculate roche lobe radius. find latest time the binary appears in binint file with $(t_{enc}<t_{snap})\,\&\,(R_{WD}<R_{RL})$. 

use binint to get story for the formation of each system (just do manually)

check BH giant collisions from new file


giant stars. forms giant white dwarf binary, probably not going to be disrupted, probably going to act as a single object. e.g. 15M BH + 0.05M white dwarf basically equal to 15M BH. 

will also eventually have to figure out the core mass of the giant, which is what becomes the white dwarf. 

find what is most common type (mass, age of giant). make a plot of black hole mass and giant mass, do different colours for type of giant. have a table for total number of different types. age of cluster/giant when collision occurs. 

collisions file is essentially upper limit assumption that all mergers give us the case where tinsp<tenc

we will use a stellar structure model from MESA, do a simulation of black hole colliding with it, treating cor eof giant as point particle with gas that is envelope of giant. model hydrodynamics of blackhole and envelope to figure out orbital properties after envelope gets ejected.  

we will take those models and apply them to observed clusters. weight each model depending on how well they match. 