# Python implementation of L-Galaxies

This is a playground to test out the possibility of using `python` as an interface into L-Galaxies.

Abbreviations used:
* desc   – descendent
* gal(s) – galax(y|ies)
* prog   – progenitor
* sub(s) – subhalo(s)

List index indentifiers:
* _gid – relative to the graph
* _sid - relative to the snap

– galaxies/orphans do not need an index identifier as they are numpy arrays defined per snap

In [None]:
# Imports of generic python routines

%load_ext autoreload
%autoreload 2

import astropy.constants as c
import astropy.units as u
import gc
import h5py
h5py.enable_ipython_completer()
import numpy as np
import sys

In [None]:
# Parameters relating to the python code development.
# Parameters relating to the SAM will be set in the input yaml file.

# Location of code
C_DIR='code-C'
PYTHON_DIR='code-python'

# Development limiter
#n_GRAPH=np.inf
n_GRAPH=2

# Verbosity
VERBOSITY=4 # 0 - Major program steps only; 1/2 - Major/minor Counters; 3/4/5 - Debugging diags.

# Input files:
# List of all available options
FILE_OPTIONS_LIST='input/available_options.yml'
# List of runtime parameters
FILE_PARAMETERS='input/input.yml'

In [None]:
# Imports of py-galaxies python routines
sys.path.insert(1,PYTHON_DIR)

# The parameter class, used to store run-time parameters
from parameters import C_parameters

# The graph class, used to store graphs for processing
from graphs import C_graph

# The halo class, used to store halo properties
from halos import C_halo

# The halo_output class and methods used to output halos
from halos import C_halo_output

# The subhalo class, used to store subhalo properties
from subs import C_sub

# The subhalo_output class, used to output subhalos
from subs import C_sub_output

# The galaxy dtype and template
from gals import D_gal, F_gal_template

# The galaxy_output class, used to output galaxies
from gals import C_gal_output

## Functions

Most of the work to be done in external routines, probably to be coded in C for efficiency.

Here we just include the high-level driver routines.

In [None]:
# These routines will eventually be moved to halos.py.
# For now they are here to help with code development.

def F_process_halos(halos,subs,gals,graph,parameters):
    """
    This is the controlling routine for halo processing.
    Need to think where best to do loop over mini-steps: here or in calling routine (probably the latter)
    Note that all halo, subhalo and gal processing can be done in parallel: not sure how to tell the compiler that.
    """
    for halo in halos:
        if parameters.verbosity>=4: print('Processing halo ',halo.halo_ID)
        if halo.b_done==True:
            raise RuntimeError('halo '+str(halo.halo_ID)+' in graph '+str(halo.graph_ID)+' already processed.')
        F_set_baryon_fraction(halo,parameters)
        #F_cool_onto_sub(halo,subs_this_snap[halo.sub_central_sid],parameters)
        if parameters.b_HOD==True:
            F_set_stellar_fraction(halo,graph,parameters)
        halo.b_done=True
    if subs != None:
        for sub in subs:
            if sub.b_done==True:
                raise RuntimeError('subhalo '+str(sub.sub_ID)+' in graph '+str(sub.graph_ID)+' already processed.')
            if sub.n_gal>1:
                # Assume instantaneous merging of galaxies in subhalos
                F_sub_merge_gals(sub,gals,parameters)
            #F_recincorporate_gas()
            #F_cool_onto_gal()
            sub.b_done=True
    if gals != None:
        F_gal_form_stars(gals,subs,parameters)
        #F_gal_AGN_feedback(gals_this_snap,subs_this_snap,parameters)
    
def F_update_halos(halos_last_snap,halos_this_snap,subs_last_snap,subs_this_snap,
                   gals_last_snap,graph,parameters):
    """
    Propagate properties from progenitor halos to descendants.
    Done as a push rather than a pull because sharing determined by progenitor.
    First loop to push halo / subhalo properties; 
    then structured gal array needs to be generated;
    then second push of gal properties.
    """
    # These offsets give the first (sub)halo in this snapshot
    halo_offset=halos_this_snap[0].halo_ID
    if subs_this_snap != None: sub_offset=subs_this_snap[0].sub_ID
    if halos_last_snap != None: halo_offset_last=halos_last_snap[0].halo_ID
    if subs_last_snap != None: sub_offset_last=subs_last_snap[0].sub_ID
    if halos_last_snap != None:
        for halo in halos_last_snap:
            # First determine what fraction to give to each descendant
            desc_start_gid=halo.desc_start_gid
            desc_end_gid=halo.desc_end_gid
            if (halo.n_desc==0): 
                print('No descendants for halo:',halo,flush=True)
                # For now just skip this halo; might want in future to log these occurances
                continue
            fractions=graph.desc_contribution[desc_start_gid:desc_end_gid]/ \
                np.sum(graph.desc_contribution[desc_start_gid:desc_end_gid])
            desc_main_sid=graph.desc_IDs[desc_start_gid+np.argmax(fractions)]-halo_offset
            halo.desc_main_sid=desc_main_sid
            halos_this_snap[desc_main_sid].n_orphan+=halo.n_orphan # All orphans gals go to main descendant
            # Now loop over descendants transferring properties to them:
            for i_desc in range(desc_start_gid,desc_end_gid):
                desc_halo_ID=graph.desc_IDs[i_desc]
                desc_halo=halos_this_snap[desc_halo_ID-halo_offset]
                assert desc_halo_ID == desc_halo.halo_ID
                if parameters.verbosity>=5: print('Processing descendant',desc_halo_ID)
                i_frac=i_desc-desc_start_gid # fraction index corresponding to descendent index i_desc
                desc_halo.mass_from_progenitors+=fractions[i_frac]*halo.mass
                if parameters.b_HOD==True:
                    desc_halo.mass_stars_from_progenitors+=fractions[i_frac]*halo.mass_stars
                    desc_halo.mass_stars+=fractions[i_frac]*halo.mass_stars # Could be set later
    # Now loop over the subhalos
    if subs_last_snap != None:
        for sub in subs_last_snap:
            sub_desc_start_gid=sub.desc_start_gid
            sub_desc_end_gid=sub.desc_end_gid
            host_sid=sub.host-halo_offset_last
            desc_main_sid=halos_last_snap[host_sid].desc_main_sid
            sub.desc_host_sid=desc_main_sid
            if sub_desc_end_gid==sub_desc_start_gid:
                # If no descendant subhalo components get given to the (main descendant of) the host halo
                # and gals become orphans of that halo.
                halos_this_snap[desc_main_sid].n_orphan+=sub.n_gal
            else:
                # Otherwise the main subhalo descendant gets all the gals
                fractions=graph.sub_desc_contribution[sub_desc_start_gid:sub_desc_end_gid]/ \
                    np.sum(graph.sub_desc_contribution[sub_desc_start_gid:sub_desc_end_gid])
                sub_desc_main_sid=graph.sub_desc_IDs[sub_desc_start_gid+np.argmax(fractions)]-sub_offset
                sub.desc_main_sid=sub_desc_main_sid
                subs_this_snap[sub_desc_main_sid].n_gal+=sub.n_gal
                subs_this_snap[sub_desc_main_sid].ICM_mass+=sub.ICM_mass
                subs_this_snap[sub_desc_main_sid].hot_gas_mass+=sub.hot_gas_mass
                # Now loop over descendants transferring properties to them
                # Only required if we decide that subhalos can split
                # for i_desc in range(sub_desc_start_gid,sub_desc_end_gid):
                #     desc_sub_ID=graph.sub_desc_IDs[i_desc]
                #     desc_sub=subs_this_snap[desc_sub_ID-sub_ID_offset]
                #     assert desc_sub_ID == desc_sub.sub_ID
                #     sub_desc_halo.<quantity>+=fractions[i_desc-desc_start]*sub.<quantity>
    # Associate subhalos with halos
    # This may already be done: there is a line self.host = graph.sub_host[sub_ID] in the subhalo class initialisation.
        
    # Now count the total number of gals and generate the gal array.
    # Note that done as a loop over subhalos within halos so as to keep all gals in
    # a halo closely associated in the array.
    n_gal=0
    for halo in halos_this_snap:
        n_gal_start=n_gal
        if halo.n_sub>0:
            for sub in subs_this_snap[halo.sub_start_sid:halo.sub_end_sid]:
                # Record the location of this subhalo's gals in the gal lookup table.
                n_gal=sub.gal_loc(n_gal)
        # Record the locations of all gals and of orphans in the gal lookup table.
        n_gal=halo.gal_loc(n_gal_start,n_gal)
    if n_gal==0: return None
    # Create new gal array and initially set all entries to empty and existence to True
    gals_this_snap=np.empty(n_gal,dtype=D_gal)
    gals_this_snap[:]=gal_template
    # Second loop to pass on gal properties.
    if gals_last_snap != None:
        gals_this_snap['gal_gid']+=gals_last_snap['gal_gid'][-1]+1 # Offset: counter added at end of loop
        if parameters.b_debug: 
            print('Pushing gals',flush=True)
        for halo in halos_last_snap:
            n_orphan=halo.n_orphan
            if n_orphan > 0:
                # match up orphans
                desc_halo=halos_this_snap[halo.desc_main_sid]
                gal_last_start=halo.orphan_start
                gal_last_end=gal_last_start+n_orphan
                gal_this_start=desc_halo.gal_count(n_orphan)
                gal_this_end=gal_this_start+n_orphan
                # Copy over all properties
                gals_this_snap[gal_this_start:gal_this_end]=gals_last_snap[gal_last_start:gal_last_end]
                # Update the tracking pointers
                gals_this_snap[gal_this_start:gal_this_end]['halo_gid']=desc_halo.halo_ID
                gals_this_snap[gal_this_start:gal_this_end]['halo_sid']=desc_halo.halo_ID-halo_offset
                gals_this_snap[gal_this_start:gal_this_end]['sub_gid']=parameters.NO_DATA_INT
                gals_this_snap[gal_this_start:gal_this_end]['sub_sid']=parameters.NO_DATA_INT
                gals_this_snap[gal_this_start:gal_this_end]['first_prog_sid']=np.arange(gal_this_start,gal_this_end)
                gals_this_snap[gal_this_start:gal_this_end]['next_prog_sid']=parameters.NO_DATA_INT
        if subs_last_snap != None:
            for sub in subs_last_snap:
                n_sub_gal=sub.n_gal
                sub_desc_start_gid=sub.desc_start_gid
                sub_desc_end_gid=sub_desc_start_gid+sub.n_desc
                gal_last_start=sub.gal_start
                gal_last_end=gal_last_start+n_sub_gal
                if sub.n_desc==0:
                    # If no descendant gals become orphans of (the main descendant of) the host halo
                    desc_halo=halos_this_snap[sub.desc_host_sid]
                    gal_this_start=desc_halo.gal_count(n_sub_gal)
                    gal_this_end=gal_this_start+n_sub_gal
                    # Copy over all properties
                    gals_this_snap[gal_this_start:gal_this_end]=gals_last_snap[gal_last_start:gal_last_end]
                    # Update the tracking pointers
                    gals_this_snap[gal_this_start:gal_this_end]['halo_gid']=desc_halo.halo_ID
                    gals_this_snap[gal_this_start:gal_this_end]['halo_sid']=desc_halo.halo_ID-halo_offset
                    gals_this_snap[gal_this_start:gal_this_end]['sub_gid']=parameters.NO_DATA_INT
                    gals_this_snap[gal_this_start:gal_this_end]['sub_sid']=parameters.NO_DATA_INT
                    gals_this_snap[gal_this_start:gal_this_end]['first_prog_sid']=np.arange(gal_this_start,gal_this_end)
                    gals_this_snap[gal_this_start:gal_this_end]['next_prog_sid']=parameters.NO_DATA_INT
                else:
                    # Otherwise the main subhalo descendant gets all the gals
                    desc_sub=subs_this_snap[sub.desc_main_sid]
                    desc_halo=halos_this_snap[sub.desc_host_sid]
                    gal_this_start=desc_sub.gal_count(n_sub_gal)
                    gal_this_end=gal_this_start+n_sub_gal
                    # Copy over all properties
                    gals_this_snap[gal_this_start:gal_this_end]=gals_last_snap[gal_last_start:gal_last_end]
                    # Update the tracking pointers
                    gals_this_snap[gal_this_start:gal_this_end]['halo_gid']=desc_halo.halo_ID
                    gals_this_snap[gal_this_start:gal_this_end]['halo_sid']=desc_halo.halo_ID-halo_offset
                    gals_this_snap[gal_this_start:gal_this_end]['sub_gid']=desc_sub.sub_ID
                    gals_this_snap[gal_this_start:gal_this_end]['sub_sid']=desc_sub.sub_ID-sub_offset
                    gals_this_snap[gal_this_start:gal_this_end]['first_prog_sid']=np.arange(gal_this_start,gal_this_end)
                    gals_this_snap[gal_this_start:gal_this_end]['next_prog_sid']=parameters.NO_DATA_INT
        gals_this_snap['gal_gid']=gals_last_snap['gal_gid'][-1]+1
    else:
        gals_this_snap['gal_gid']=0
    gals_this_snap['graph_ID']=graph.graph_ID
    gals_this_snap['snap_ID']=halos_this_snap[0].snap_ID
    gals_this_snap['gal_gid']+=np.arange(n_gal)
    return gals_this_snap

def F_update_parameters(graph_file,parameters):
    for key, value in graph_file['Header'].attrs.items():
        if parameters.b_display_parameters: print(key,value)
        exec('parameters.'+key+'=value')
    parameters.n_graph=len(graph_file['graph_lengths'])
    # Put code in here to either copy table of snapshot redshifts/times from graph_file,
    # Or calculate them if that does not exist.
    # Currently read from disk:
    parameters.snap_table=np.loadtxt(parameters.snap_input_file,usecols=[0,2,4],
        dtype=[('snap_ID',np.int32),('redshift',np.float32),('time_in_years',np.float32)])

# These routines will be replaced by interfaces to existing L-Galaxies routines, 
# written in C and located in code-C/

def F_set_baryon_fraction(halo,parameters):
    halo.mass_baryon=parameters.baryon_fraction*max(halo.mass,halo.mass_from_progenitors)
    
# This one is a for development testing
# It's full of magic numbers; but it's only a fudge, so not putting them in parameter file
def F_set_stellar_fraction(halo,graph,parameters):
    if halo.mass>1e12:
        halo.star_formation_rate=0.
    else:
        max_mass_stars=0.1*halo.mass_baryon
        halo.star_formation_rate=max(0.,(max_mass_stars-halo.mass_stars)/3e9)
    dt=parameters.snap_table['time_in_years'][halo.snap_ID]- \
        parameters.snap_table['time_in_years'][halo.snap_ID-1]
    halo.mass_stars+=halo.star_formation_rate*dt
    
# Merge gals in subhalos
def F_sub_merge_gals(sub,gals,parameters):
    # In initial testing, just add components;
    # In the main routine then need perhaps to add angular momenta, check for starburst, etc.
    i_start=sub.gal_start
    i_end=sub.gal_end
    # Accumulate properties onto first gal
    gals['stellar_mass'][i_start]=np.sum(gals['stellar_mass'][i_start:i_end])
    gals['stellar_mass'][i_start+1:i_end]=0.
    gals['cold_gas_mass'][i_start]=np.sum(gals['cold_gas_mass'][i_start:i_end])
    gals['cold_gas_mass'][i_start+1:i_end]=0.
    gals['b_exists'][i_start+1:i_end]=False
    sub.n_gal=1
    sub.gal_end=sub.gal_start+1
                                                            
# Form stars.
def F_gal_form_stars(gals,subs,parameters):
    # In initial testing, just place one unit of stars in each gal each snapshot
    gals['stellar_mass']=np.where(gals['b_exists'],gals['stellar_mass']+1,gals['stellar_mass'])
    return None

## Main routine

### Initialisation

In [None]:
# Read in all the parameters of the run from the yaml and graph input files.

# Read in parameters from yaml input files
parameters=C_parameters(FILE_PARAMETERS,FILE_OPTIONS_LIST)

# Open graph input file
graph_file=h5py.File(parameters.graph_input_file,'r')

# Update parameters with information from graph_file
F_update_parameters(graph_file,parameters)

# Create galaxy template
gal_template=F_gal_template(parameters)

# Create output buffers
halo_output=C_halo_output(parameters)
sub_output=C_sub_output(parameters)
gal_output=C_gal_output(parameters)

### Loop over graphs, snapshots, halos, implementing the SAM

In [None]:
# Loop over graphs
for i_graph in range(min(parameters.n_graph,n_GRAPH)):
    if VERBOSITY >= 2: print('Processing graph',i_graph,flush=True)
    graph = C_graph(i_graph,graph_file,parameters)
    
    # Loop over snapshots
    halos_last_snap = None
    subs_last_snap = None
    gals_last_snap = None
    for i_snap in graph.snap_ID:
        if i_snap == parameters.NO_DATA_INT: 
            assert halos_last_snap == None
            continue
        if VERBOSITY >= 3: print('Processing snapshot',i_snap,flush=True)
            
        # Initialise halo and subhalo properties.
        # This returns a list of halo and subhalo instances
        # This may be slow: an alternative would be to use np arrays.
        halos_this_snap = [C_halo(i_graph,i_snap,i_halo,graph,parameters) for i_halo in 
                         graph.halo_start_gid[i_snap]+range(graph.n_halo_snap[i_snap])]
        subs_this_snap = None
        if graph.n_sub > 0:
            if graph.n_sub_snap[i_snap] > 0:
                subs_this_snap = [C_sub(i_graph,i_snap,i_sub,graph,parameters) 
                                     for i_sub in graph.sub_start_gid[i_snap]+range(graph.n_sub_snap[i_snap])]
        
        # Propagate information from progenitors to this generation
        # Done as a push rather than a pull because sharing determined by progenitor
        # Have to do this even if no progenitors in order to initialise galaxy array
        gals_this_snap=F_update_halos(halos_last_snap,halos_this_snap,subs_last_snap,
                                          subs_this_snap,gals_last_snap,graph,parameters)
        del halos_last_snap
        del subs_last_snap
        del gals_last_snap
        #gc.collect() # garbage collection -- safe but very slow.

        # Process the halos
        # Probably insert the loop over mini timesteps here.
        F_process_halos(halos_this_snap,subs_this_snap,gals_this_snap,graph,parameters)
            
        # Once all halos have been done, output results
        # This could instead be done on a halo-by-halo basis in F_process_halos
        halo_output.append(halos_this_snap,parameters)
        if subs_this_snap != None: sub_output.append(subs_this_snap,parameters)
        if gals_this_snap != None: gal_output.append(gals_this_snap,parameters)
            
        # Rename this_snap data structures to last_snap
        halos_last_snap=halos_this_snap
        subs_last_snap=subs_this_snap
        gals_last_snap=gals_this_snap

        # Delete old references (so that create new objects on next snapshot)
        del halos_this_snap
        del subs_this_snap
        del gals_this_snap

    # Tidy up
    del halos_last_snap
    del subs_last_snap
    del gals_last_snap

###  Tidy up and exit

In [None]:
# Flush buffers, close files and exit
# Note: could probably do this automatically by renaming close() to __del__()
graph_file.close()
halo_output.close()
sub_output.close()
gal_output.close()