In [2]:
##############################################
# created 05-03-2024
#
# Python script meant to run COMPAS using DisBatch
# I run 10^6 systems at a bunch of discrete metallicities (each divided into batches)
# The end of this script combines the output using h5copy
# 
##############################################
import numpy as np
import os
from subprocess import Popen, PIPE
import subprocess
import sys
import shutil
import h5py as h5

from definitions import sim_flags_dict

## Step 0: set variables

In [21]:
#################################################################
## 
##    Should be Changed by user ##
##
#################################################################
sim_name             = "OldWinds_RemFryer2012_noBHkick"#"OldWinds_RemFryer2012" # Note: the sim_name will determine which flags to run COMPAS with
root_out_dir         = f"/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/{sim_name}"
file_name            = 'COMPAS_Output_wWeights.h5'
user_email           = "aac.van.son@gmail.com"
gid_filename         = "BSE_grid_mass_sep_kick.txt"

### Different options for the metalicites:
# 0.0001, 0.0003, 0.001, 0.004, 0.01, 0.02, 0.03 # Hurley Z's
# [0.0001, 0.00017321, 0.0003, 0.00054772, 0.001, 0.002, 0.004, 0.00632456, 0.01, 0.01414214, 0.02, 0.03] # Hurley with extra steps
# np.logspace(-4, np.log10(0.03), 17)  # flat in log
metallicities = [0.0001, 0.00017321, 0.0003, 0.00054772, 0.001, 0.002, 0.004, 0.00632456, 0.01, 0.01414214, 0.02, 0.03] # Hurley with extra steps

### What COMPAS flags to run with? 

Based on your sim_name, we will now construct a combination of COMPAS flags that set the 'physics' we would like to run with.


#### Main flags for the run

There are also a bunch of flags that are the same for every run:

--add-options-to-sysparms: 'NEVER' <br>
--grid: f'{root_out_dir}/{gid_filename}' <br>
--logfile-definitions: f'{root_out_dir}/COMPAS_Output_Definitions.txt' <br>
--grid-start-line: f"{Njob*batch_size}" <br>
--grid-lines-to-process: f"{batch_size}" <br>
--output-path:  f"{run_dir}" <br>
--metallicity z


### 'old' winds (i.e. fiducial in v02.35.02) 
Should be retrieved with '--mass-loss-prescription BELCZYNSKI2010 ` but explicitely:  <br>

--OB-mass-loss VINK2001 <br> 
--VMS-mass-loss NONE <br> 
--VERY_MASSIVE_MINIMUM_MASS 200 (i.e., not applied since Mmax =150)  <br> 
--RSG-mass-loss NJ90 <br> 
--WR-mass-loss BELCZYNSKI2010 <br>


### 'new' winds (fiducial in v02_46_01) 
` --wolf-rayet-multiplier 1 --mass-loss-prescription BELCZYNSKI2010
Note Compas used `--wolf-rayet-multiplier 0.1` as a default, but I never adopted that

--OB-mass-loss VINK2021 <br> 
--VMS-mass-loss SABHAHIT2023 <br> 
--VERY_MASSIVE_MINIMUM_MASS 100 (i.e., not applied since Mmax =150)  <br> 
--RSG-mass-loss DECIN2023  <br> 
--WR-mass-loss SANDERVINK2023 <br> 


### Remnant mass & kick variations

Old fiducial is Fryer, with reduced fallback
`--remnant-mass-prescription FRYER2012` , with  `--kick-magnitude-distribution MAXWELLIAN`  <br> 

`--remnant-mass-prescription MULLERMANDEL` with `--kick-magnitude-distribution MULLERMANDEL`  <br>
  


### variations

NO MS winds `--OB-mass-loss NONE` & no VMS winds `--VMS-mass-loss NONE` <br> 
NO WR winds `--wolf-rayet-multiplier 0`  (previously noWRWindN1e6 = 0, StrongWRWindN1e6 = 5.0) <br> 
NO winds at all `--overall-wind-mass-loss-multiplier 0` # ZEROWindsN1e6 <br>
NO BH kicks `--remnant-mass-prescription FRYER2012`  with `--black-hole-kicks ZERO`  <br>
NO kicks at all `--remnant-mass-prescription FRYER2012`  with `--kick-magnitude-distribution ZERO` <br>





<b> other older Wind Variations:</b>

--cool-wind-mass-loss-multiplier # noCoolWindN1e6 = 0, StrongCoolWindN1e6 = 10.0 <br>
--black-hole-kicks (kills all kicks) options: [FULL, REDUCED, ZERO, FALLBACK], default = FALLBACK  # noBHkickN1e6 <br>
--luminous-blue-variable-prescription # noLBVN1e6 (options: [NONE, HURLEY_ADD, HURLEY, BELCZYNSKI], default = HURLEY_ADD) <br>


In [22]:
# Check if sim_name exists in the dictionary
if sim_name in sim_flags_dict:
    sim_variation_flags = sim_flags_dict[sim_name]
    print(sim_variation_flags)
else:
    print(f"Unknown sim_name: {sim_name}")

--mass-loss-prescription BELCZYNSKI2010 --OB-mass-loss VINK2001 --VMS-mass-loss VINK2011 --RSG-mass-loss NJ90 --WR-mass-loss BELCZYNSKI2010  --remnant-mass-prescription FRYER2012 --kick-magnitude-distribution MAXWELLIAN  --black-hole-kicks ZERO 


### Make root out dir and Copy your BSE_grid 
I am interested in rerunning the exact same ~1e6 binaries at different metallicities

I am using masterfolder/BSE_grid_mass_sep_kick.txt, 

In [5]:
###############################################
# Make the output directory if it doesn't exist
if not os.path.isdir(root_out_dir):
    print('root_out_dir =  ', root_out_dir)
    os.makedirs(root_out_dir, exist_ok=True)

    # copy this python script to the ROOT out dir (for reference)
    shutil.copyfile('Grid_Call.ipynb', f'{root_out_dir}/Grid_Call.ipynb')  
    shutil.copyfile(f'{gid_filename}', f'{root_out_dir}/{gid_filename}')  
    shutil.copyfile('COMPAS_Output_Definitions.txt', f'{root_out_dir}/COMPAS_Output_Definitions.txt')  
else:
    print(f'Nothing to do, {root_out_dir} already exists')

###############################################
def divide_with_remainder(numerator, denominator):
    batch_size = numerator // denominator
    n_jobs     = numerator/batch_size
    remainder  = numerator % denominator
    return batch_size, int(n_jobs), remainder

# details for your run
with open(f'{root_out_dir}/{gid_filename}', 'r') as f:
    # Read the file into a list of lines
    lines = f.readlines()
num_lines = len(lines)
print('num_lines',num_lines)

N_binaries           = num_lines #int(1e3)  # how many binaries to run in total
N_chunks             = 40         # how many batches to run this in (N_binaries/N_chunks is not an int, you will run the remainder in an extra last batch)

# Determine how many batches to run
batch_size, n_jobs, remainder = divide_with_remainder(N_binaries, N_chunks)
last_batch_size, extra_job    =  batch_size, 0
if remainder != 0.:
    extra_job = 1
    print(r'N_binaries = %s can not be divided properly into N_chunks=%s'%(N_binaries, N_chunks))
    print('You will run 1 extra job with %s binaries'%(remainder))

print('n_jobs',n_jobs, 'of batch_size', batch_size)

root_out_dir =   /mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noNSBHkick
num_lines 1000000
n_jobs 40 of batch_size 25000


## Step 1: Make a list of tasks to submit to [disBatch](https://github.com/flatironinstitute/disBatch)

Now we are going to construct tasks. A task looks somthing like ( cd /path/to/workdir ; source SetupEnv ; myprog -a 0 -b 0 -c 0 ) &> task_0_0_0.log

For my COMPAS batches, this consists of the follwing steps:
* cd {rundir}
* module load python gsl boost hdf5
* $COMPAS_ROOT_DIR/src/COMPAS -flags  > COMPAS_batch_i.log




In [6]:
# open a file to write the tasks to 
with open(f'{root_out_dir}/Tasks', 'w') as f:

    # Hurley metallicities + extra steps
    for metallicity in metallicities: #
        print('metallicity', metallicity)

        # Make a dir for this metallicity
        base_run_dir = root_out_dir+f'/logZ{np.round(np.log10(metallicity),2)}/'
        os.makedirs(base_run_dir, exist_ok=True)

        # Loop over every batch 
        for Njob in range(n_jobs + extra_job):
            # directory where you will copy the files to and run compas from
            run_dir = base_run_dir+'/batch'+'_%s'%(Njob) +'/'
            os.makedirs(run_dir, exist_ok=True)

            ############################################
            # Compile the flags you want for this task
            # if you are on the last job and the remainder is nonzero, run the remainder
            if np.logical_and(remainder !=0, Njob == n_jobs):
                print('you are on the extra job, use remainder as batch_size ')
                COMPAS_batch_flags = f"--metallicity {metallicity} {sim_variation_flags} --allow-touching-at-birth True --add-options-to-sysparms 'NEVER' --grid '{root_out_dir}/{gid_filename}' --logfile-definitions '{root_out_dir}/COMPAS_Output_Definitions.txt' --grid-start-line '{Njob*batch_size}' --grid-lines-to-process '{remainder}' --output-path '{run_dir}' "
            else:
                COMPAS_batch_flags = f"--metallicity {metallicity} {sim_variation_flags} --allow-touching-at-birth True --add-options-to-sysparms 'NEVER' --grid '{root_out_dir}/{gid_filename}' --logfile-definitions '{root_out_dir}/COMPAS_Output_Definitions.txt' --grid-start-line '{Njob*batch_size}' --grid-lines-to-process '{batch_size}' --output-path '{run_dir}' "

            # print(COMPAS_batch_flags)
            # NOTE!!  --allow-touching-at-birth = True, otherwise as I increase Z, some systems will fail!

            task_line = f"cd {run_dir} ; module load python gsl boost hdf5 ; $COMPAS_ROOT_DIR/src/COMPAS {COMPAS_batch_flags}  > COMPAS_batch_{Njob}.log 2>&1 " 
            f.write(task_line + '\n')



metallicity 0.0001
metallicity 0.00017321
metallicity 0.0003
metallicity 0.00054772
metallicity 0.001
metallicity 0.002
metallicity 0.004
metallicity 0.00632456
metallicity 0.01
metallicity 0.01414214
metallicity 0.02
metallicity 0.03


# Step 2: Execute the Tasks with DisBatch

make sure to `module load disBatch` 

Go to your root_out_dir and just run: 
sbatch -n 50 disBatch Tasks




In [7]:
import re

# Initialize an empty list to store the job IDs
disBatch_job_ids = []

# disBatch Command
command = f"module load disBatch && sbatch -p cca -n 100 disBatch {root_out_dir}/Tasks"
result = subprocess.run(command, shell=True, capture_output=True, text=True)

# Extract the job ID from the output
match = re.search(r"Submitted batch job (\d+)", result.stdout)
if match:
    job_id = match.group(1)
    disBatch_job_ids.append(job_id)

print(disBatch_job_ids)

['3519875']


# Step 3: Combine the hdf5 files in post processing


In [23]:
###############################################
# post proces tasks 
###############################################
print(10* "*" + ' You are Going to Run PostProcessing.py')

with open(f'{root_out_dir}/PP_Tasks', 'w') as f:

    for metallicity in metallicities: 
        base_run_dir = root_out_dir+f'/logZ{np.round(np.log10(metallicity),2)}/'

        print(base_run_dir)

        # copy the h5copy to the root out dir
        shutil.copyfile('h5copy.py', f'{base_run_dir}/h5copy.py')  

        # task line
        task_line = f"cd {base_run_dir} ; module load python ; python h5copy.py {base_run_dir} -r 2 -o COMPAS_Output.h5  > COMPAS_PP.log 2>&1 " 
        f.write(task_line + '\n')


############################################
# Submit the job! 
PP_job_ids = []

# disBatch Command
command = f"module load disBatch && sbatch --dependency=afterok:{disBatch_job_ids} -p cca -n 20 disBatch {root_out_dir}/PP_Tasks"
result = subprocess.run(command, shell=True, capture_output=True, text=True)

# Extract the job ID from the output
match = re.search(r"Submitted batch job (\d+)", result.stdout)
if match:
    job_id = match.group(1)
    PP_job_ids.append(job_id)

print(PP_job_ids)


********** You are Going to Run PostProcessing.py
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-4.0/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-3.76/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-3.52/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-3.26/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-3.0/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-2.7/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-2.4/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-2.2/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-2.0/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/logZ-1.85/
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick

## Check on the outcome of your individual simulations

In [24]:
## First check the individual metallicity runs
root_data_dir = f'/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/{sim_name}/'

for metallicity in metallicities: 
    data = h5.File(f'{root_data_dir}/logZ{np.round(np.log10(metallicity),2)}/COMPAS_Output.h5','r')

    print('Z = ', np.unique(data['BSE_System_Parameters']['Metallicity@ZAMS(1)'][()]) )

    print(data.keys())
    # data['BSE_System_Parameters'].keys()
    print(data['BSE_System_Parameters']['SEED'] )




Z =  [0.0001]
<KeysViewHDF5 ['BSE_Common_Envelopes', 'BSE_Double_Compact_Objects', 'BSE_RLOF', 'BSE_Supernovae', 'BSE_System_Parameters', 'Run_Details']>
<HDF5 dataset "SEED": shape (1000000,), type "<u8">
Z =  [0.00017321]
<KeysViewHDF5 ['BSE_Common_Envelopes', 'BSE_Double_Compact_Objects', 'BSE_RLOF', 'BSE_Supernovae', 'BSE_System_Parameters', 'Run_Details']>
<HDF5 dataset "SEED": shape (1000000,), type "<u8">
Z =  [0.0003]
<KeysViewHDF5 ['BSE_Common_Envelopes', 'BSE_Double_Compact_Objects', 'BSE_RLOF', 'BSE_Supernovae', 'BSE_System_Parameters', 'Run_Details']>
<HDF5 dataset "SEED": shape (1000000,), type "<u8">
Z =  [0.00054772]
<KeysViewHDF5 ['BSE_Common_Envelopes', 'BSE_Double_Compact_Objects', 'BSE_RLOF', 'BSE_Supernovae', 'BSE_System_Parameters', 'Run_Details']>
<HDF5 dataset "SEED": shape (1000000,), type "<u8">
Z =  [0.001]
<KeysViewHDF5 ['BSE_Common_Envelopes', 'BSE_Double_Compact_Objects', 'BSE_RLOF', 'BSE_Supernovae', 'BSE_System_Parameters', 'Run_Details']>
<HDF5 dataset "

# Step 4 combine individual Z sim into a big hdf5 file

### Finally combine each individual metallicity simulation

In [27]:
# copy the h5copy to the root out dir
shutil.copyfile('h5copy.py', f'{root_out_dir}/h5copy.py')  

with open(f'{root_out_dir}/combineZ_Tasks', 'w') as f:

    # task line
    task_line = f"cd {root_out_dir} ; module load python ; python h5copy.py {root_out_dir} -r 1 -o COMPAS_Output_combinedZ.h5  > COMPAS_PP.log 2>&1 " 
    f.write(task_line + '\n')

# disBatch Command
command = f"module load disBatch && sbatch -p cca -n 20 disBatch {root_out_dir}/combineZ_Tasks" #--dependency=afterok:{PP_job_ids}
result = subprocess.run(command, shell=True, capture_output=True, text=True)


In [25]:



###############################################
# Make and safe a slurm command to combine all metallicities
h5Flags = f' {root_out_dir} -r 1 -o COMPAS_Output_combinedZ.h5'

PP_job_string = MakeSlurmBatch(run_dir = root_out_dir, job_line = "python h5copy.py ",\
job_name = "CombineMetallicities", number_of_cores = 1, partition='cca',\
walltime = "0-1:00:00" ,memory = "1G", email = user_email, flags= h5Flags)

print(PP_job_string)

# copy the h5copy to the root out dir
shutil.copyfile('h5copy.py', f'{root_out_dir}/h5copy.py')  


############################################
# Submit the job to sbatch! 
PPjob_id = RunSlurmBatch(run_dir = root_out_dir, job_name = "/CombineMetallicities")#, dependency = True, dependent_ID = PP_job_ids)
    

writing  /mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/CombineMetallicities.sbatch
#!/bin/bash
#SBATCH --job-name=CombineMetallicities               #job name
#SBATCH --ntasks=1                 # Number of cores
#SBATCH --output=/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/CombineMetallicities.out                 # output storage file
#SBATCH --error=/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick/CombineMetallicities.err                  # error storage file
#SBATCH --time=0-1:00:00                   # Runtime in minutes
#SBATCH --mem=1G                    # Memory per cpu in MB (see also --mem-per-cpu)
#SBATCH -p cca
#SBATCH --mail-user=aac.van.son@gmail.com              # Send email to user
#SBATCH --mail-type=FAIL            #
#
#Load modules
module load python gsl boost hdf5
# 
#CD to output directory
cd /mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemFryer2012_noBHkick
#
# Run yo

In [None]:
##################################################################
# This is the slurm script youre using
##################################################################
def MakeSlurmBatch(run_dir = None, job_line = "python runSubmit.py", job_name = "runCOMPAS",
                   number_of_cores = 1, partition='cca,gen', flags=" ", walltime = '01:00:00' ,memory = '1000', email = None):

    outfile = f"{run_dir}/{job_name}.out"
    errfile = f"{run_dir}/{job_name}.err"

    job = job_line + flags + " > "+job_name+".log"

    # Make slurm script string
    SlurmJobString=f"""#!/bin/bash
#SBATCH --job-name={job_name}               #job name
#SBATCH --ntasks={number_of_cores}                 # Number of cores
#SBATCH --output={outfile}                 # output storage file
#SBATCH --error={errfile}                  # error storage file
#SBATCH --time={walltime}                   # Runtime in minutes
#SBATCH --mem={memory}                    # Memory per cpu in MB (see also --mem-per-cpu)
#SBATCH -p {partition}
#SBATCH --mail-user={user_email}              # Send email to user
#SBATCH --mail-type=FAIL            #
#
#Load modules
module load python gsl boost hdf5
# 
#CD to output directory
cd {run_dir}
#
# Run your job
{job}
"""

    sbatchFile = open(f'{run_dir}/{job_name}.sbatch','w')
    print('writing ',  f'{run_dir}/{job_name}.sbatch')
    sbatchFile.write(SlurmJobString)
    sbatchFile.close()

    return SlurmJobString


###############################################
###
###############################################
def RunSlurmBatch(run_dir = None, job_name = "runCOMPAS", dependency = False, dependent_IDs = None):

    if not dependency:
        sbatchArrayCommand = 'sbatch ' + os.path.join(run_dir+job_name+'.sbatch') 
    else:
        # Join the dependent IDs with colons
        dependent_IDs_str = ":".join(map(str, dependent_IDs))
        sbatchArrayCommand = 'sbatch --dependency=afterok:' + dependent_IDs_str + ' ' + os.path.join(run_dir+job_name+'.sbatch') 
        # sbatchArrayCommand = 'sbatch --dependency=afterok:' + str(int(dependent_ID)) + ' ' + os.path.join(run_dir+job_name+'.sbatch') 

    # Open a pipe to the sbatch command.
    proc = Popen(sbatchArrayCommand, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)

    # Send job_string to sbatch
    if (sys.version_info > (3, 0)):
        proc.stdin.write(sbatchArrayCommand.encode('utf-8'))
    else:
        proc.stdin.write(sbatchArrayCommand)

    print('sbatchArrayCommand:', sbatchArrayCommand)
    out, err = proc.communicate()
    print("out = ", out)
    if out:
        job_id = out.split()[-1]
        print("job_id", job_id)
        return job_id
    else:
        print("Error: sbatch returned no output")
        print("err = ", err)
        return None


In [None]:
###############################################
# Make Post Processing batch and submit it
###############################################

PP_job_ids = []
for metallicity in metallicities: 
    base_run_dir = root_out_dir+f'/logZ{np.round(np.log10(metallicity),2)}/'

    print(10* "*" + ' You are Going to Run PostProcessing.py')
    print(base_run_dir)

    ###############################################
    # Make and safe a slurm command
    h5Flags = f' {base_run_dir} -r 2 -o COMPAS_Output.h5'

    # copy the h5copy to the root out dir
    shutil.copyfile('h5copy.py', f'{base_run_dir}/h5copy.py')  

    PP_job_string = MakeSlurmBatch(run_dir = base_run_dir, job_line = "python h5copy.py ",\
    job_name = "COMPAS_PP", number_of_cores = 1, partition='gen',\
    walltime = "0-1:00:00" ,memory = "1G", email = user_email, flags= h5Flags)

    print(PP_job_string)

    ############################################
    # Submit the job to sbatch! 
    PPjob_id = RunSlurmBatch(run_dir = base_run_dir, job_name = "COMPAS_PP", dependency = False, dependent_IDs = disBatch_job_ids)
    PP_job_ids.append(PPjob_id)

********** You are Going to Run PostProcessing.py
/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemMullerMandel/logZ-4.0/
writing  /mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemMullerMandel/logZ-4.0//COMPAS_PP.sbatch
#!/bin/bash
#SBATCH --job-name=COMPAS_PP               #job name
#SBATCH --ntasks=1                 # Number of cores
#SBATCH --output=/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemMullerMandel/logZ-4.0//COMPAS_PP.out                 # output storage file
#SBATCH --error=/mnt/home/lvanson/ceph/CompasOutput/v02.46.01/OldWinds_RemMullerMandel/logZ-4.0//COMPAS_PP.err                  # error storage file
#SBATCH --time=0-1:00:00                   # Runtime in minutes
#SBATCH --mem=1G                    # Memory per cpu in MB (see also --mem-per-cpu)
#SBATCH -p gen
#SBATCH --mail-user=aac.van.son@gmail.com              # Send email to user
#SBATCH --mail-type=FAIL            #
#
#Load modules
module load python gsl boost hdf5
# 
#CD to output d