# Python implementation of L-Galaxies

This is a playground to test out the possibility of using `python` as an interface into L-Galaxies.

In [None]:
# Imports of generic python routines
import astropy.constants as c
import astropy.units as u
import gc
import h5py
h5py.enable_ipython_completer()
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_context('notebook')
sns.set_style('whitegrid')
import sys

In [None]:
# Parameters relating to the python code development.
# Parameters relating to the SAM will be set in the input yaml file.

# Location of code
C_DIR='code-C'
PYTHON_DIR='code-python'

# Development limiter
#n_GRAPH=np.inf
n_GRAPH=5

# Whether or not to pack halo instances with progenitor/descendant information
b_HALO_FULL=True

# Debug/testing switch
b_DEBUG=True

# Verbosity
VERBOSITY=2 # 0 - Major program steps only; 1/2 - Major/minor Counters; 3 - Debugging diags.

# Script parameters
FILE_PARAMETERS='input/input.yml'
b_DISPLAY_PARAMETERS=True

In [None]:
# Imports of py-galaxies python routines
sys.path.insert(1,PYTHON_DIR)

# The parameter class, used to store run-time parameters
from parameters import C_parameters

# The graph class, used to store graphs for processing
from graphs import C_graph

In [None]:
# Read in all the parameters of the run from the yaml and graph input files.

parameters=C_parameters(FILE_PARAMETERS,VERBOSITY,b_DEBUG)
if b_DISPLAY_PARAMETERS: print(parameters)

# This code was supposed to read the particle mass, but that has disappeared!
graph_file=h5py.File(parameters.graph_input_file,'r')
for key, value in graph_file['Header'].attrs.items():
    if b_DISPLAY_PARAMETERS: print(key,value)
    exec('parameters.'+key+'=value')
n_graph=len(graph_file['graph_lengths'])

In [None]:
# Loop over graphs
for i_graph in range(n_graph):
    if VERBOSITY>=2: print('Processing graph',i_graph)
    graph=C_graph(i_graph,graph_file,parameters)

## Functions

Most of the work to be done in external routines, probably to be coded in C for efficiency.

Here we just include the high-level driver routines.

### I/O routines

In [None]:
# Open graph data file
def openGraphInput():
    return h5py.File(parameters['inputFiles']['graphFile'],'r')

# Close graph data file
def closeGraphInput(graphInputFile):
    graphInputFile.close()

# Open galaxy output file
def openGalaxyOutput(galaxyOutputFile):
    galaxyOutputFile=h5py.File(parameters['outputFiles']['galaxyFile'],'w')

# Close galaxy output file
def closeGalaxyOutput():
    galaxyOutputFile.close()

# Open halo output file and create iobuffer and empty output dataset
def openHaloOutput():
    haloOutputFile=h5py.File(parameters['outputFiles']['haloFile'],'w')
    haloOutputData=np.empty(io_nRec,dtype=dtype_halo)
    haloOutputDataset=haloOutputFile.create_dataset('Halos',(0,),maxshape=(None,),dtype=dtype_halo,compression='gzip')
    haloOutput_iRec=0
    return haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec
    
# Close galaxy output file
def closeHaloOutput(haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec):
    # Write out halos remaining in output buffer
    if haloOutput_iRec>0: haloOutput_iRec=flushOutput(haloOutput_iRec,haloOutputData,haloOutputDataset)
    haloOutputFile.close()
    return haloOutput_iRec

# Output desired halo properties
# For now just dump into a single dataset.  Will worry about whether we want a different format later
# First need to define the dtype for the numpy structured array output.  Use 32 bit to save space.
dtype_halo=np.dtype([
    ('graphID',np.int32),
    ('snapID',np.int32),
    ('haloID',np.int32),
    ('catalogID',np.int64),
    ('mass',np.float32),
    ('massBaryon',np.float32),
    ('mass_fromProgenitors',np.float32)
])
def outputHalos(halos,haloOutputData,haloOutputDataset,haloOutput_iRec):
    # Construct structured numpy array with desired data in
    for halo in halos:
        haloOutputData[haloOutput_iRec]['graphID']=halo.graphID
        haloOutputData[haloOutput_iRec]['snapID']=halo.snapID
        haloOutputData[haloOutput_iRec]['haloID']=halo.haloID
        haloOutputData[haloOutput_iRec]['catalogID']=halo.catalogID
        haloOutputData[haloOutput_iRec]['mass']=halo.mass
        haloOutputData[haloOutput_iRec]['massBaryon']=halo.massBaryon
        haloOutputData[haloOutput_iRec]['mass_fromProgenitors']=halo.mass_fromProgenitors
        haloOutput_iRec+=1
        if haloOutput_iRec==io_nRec: haloOutput_iRec=flushOutput(haloOutput_iRec,haloOutputData,haloOutputDataset)
    return haloOutput_iRec
    
# Writes numpy output array to HDF5 dataset
def flushOutput(nRec,outputData,outputDataset):
    outputDataset.resize((outputDataset.shape[0]+nRec,))
    outputDataset[-nRec:]=outputData[:nRec]
    nRec=0
    return nRec

### Halo processing routines

In [None]:
# Processing halos 
def processHalo(halo):
    if verbosity>=3: print('Processing halo ',halo.haloID)
    if halo.done==True: 
        print('Warning: processHalo: halo ',str(halo),' already processed.')
        assert False
    readProperties(halo)
    calcMassToDesc(halo)
    # Omit gatherProgenitors for first generation of halos!
    if halos_lastSnap != None: gatherProgenitors(halo)
    setBaryonFraction(halo)
#     fixStellarFraction(halo) # Dummy routine.
    halo.done=True
    
def readProperties(halo):
    # Reads halo properties from the input graph file
    halo.catalogID=graphInputFile[halo.graphID][halo.snapID][halo.haloID].attrs.get('catalogID')
    halo.mass=graphInputFile[halo.graphID][halo.snapID][halo.haloID].attrs.get('halo_mass')
    return

def calcMassToDesc(halo):
    # Determines how much mass goes to each descendant, in proportion to desc_mass_contribution
    desc_mass_contribution=graphInputFile[halo.graphID][halo.snapID][halo.haloID]['desc_mass_contribution']
    desc_mass=np.array(desc_mass_contribution/np.sum(desc_mass_contribution)*halo.mass,dtype=np.float64)
    halo.desc_mass=dict(zip(graphInputFile[halo.graphID][halo.snapID][halo.haloID]['desc_haloIDs'],desc_mass))

def gatherProgenitors(halo):
    # Collects information about material inherited from progenitors
    # First halo in progenitor list
    prog0_haloID=int(halos_lastSnap[0].haloID)
    for prog_haloID in graphInputFile[halo.graphID][halo.snapID][halo.haloID]['prog_haloIDs']:
        # Position in progenitor (lastSnap) halo list
        if debugFlag and verbosity>=3 : print('prog_haloID =',prog_haloID)
        prog_index_lastSnap=int(prog_haloID)-prog0_haloID
        if debugFlag and verbosity>=3: print('prog_index_lastSnap =',prog_index_lastSnap)
        # Check halo association
        # This is needed because HDF5 may store halos in a different order - 
        # if this happens, will need to add code to do a search over all halos
        # in lastSnap, or to rearrange into ascending order.
        if debugFlag:
            assert int(halos_lastSnap[prog_index_lastSnap].haloID) == int(prog_haloID)
        # Now gather the appropriate information from the progenitor halo
        halo.mass_fromProgenitors+=halos_lastSnap[prog_index_lastSnap].desc_mass[int(halo.haloID)]
    return

def setBaryonFraction(halo):
    halo.massBaryon=fBaryon*max(halo.mass,halo.mass_fromProgenitors)

## Main routine

In [None]:
# Open graph input file
# Note: can't pass undefined argument into function
graphInputFile=openGraphInput()

# Open output files
# Note: can't pass undefined argument into function
haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec=openHaloOutput()
# Note: no attempt here to include sub-halos (galaxies).  Let's get halos right first!
#openGalaxyOutput(galaxyOutputFile)

# Iteratively loop over halos, doing whatever processing is required.
# This assumes that halos properties depend only upon those halos in their immediate past in the merger graph.

# Loop over MergerGraphs.
nHalo=0
#nHaloGraph=0
#nHaloSnap=0
for graphID in graphInputFile['/']:
    if verbosity>=2: print('Processing graph',graphID)
    graph=graphInputFile[graphID]
    # Loop over snapshots from first to last.
    halos_lastSnap = None
    snapID_old=-1
    for snapID in graph:  
        # Not sure that these are guaranteed to be in increasing order, so put in this check
        if snapID_old!=-1: assert int(snapID)==snapID_old+1
        snapID_old=int(snapID)
        if verbosity>=2: print('        snapshot',snapID)
        snap=graph[snapID]
        # Initialise halo properties
        halos_thisSnap=[haloClass(graphID,snapID,haloID) for haloID in snap]
        # Loop over halos in snapshot.
        for halo in halos_thisSnap: 
            processHalo(halo)
            nHalo +=1
            if verbosity>=1 and nHalo%1000==0: print('Processed {:d} halos'.format(nHalo))
        # Once all halos have been done, output results, update reference to lastSnap (and hence free memory)
        haloOutput_iRec=outputHalos(halos_thisSnap,haloOutputData,haloOutputDataset,haloOutput_iRec)
        del halos_lastSnap
        #gc.collect() # garbage collection -- safe but very slow.
        halos_lastSnap=halos_thisSnap
        # Delete reference to this memory to free variable for new use.
        del halos_thisSnap
        # Temporary halt to limit to finite time
        if nHalo>=nHaloMax:
            closeHaloOutput(haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec)
            assert False
if not debugFlag: del halos_lastSnap

# Close input file
closeGraphInput(graphInputFile)

# Close output files
closeHaloOutput(haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec)
#closeGalaxyOutput(galaxyOutputFile)

In [None]:
print(haloOutputFile)
haloOutputFile.close()

In [None]:
closeHaloOutput(haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec)

In [None]:
np.array([
            halo.graphID,
            halo.snapID,
            halo.haloID,
            halo.catalogID,
            halo.mass,
            halo.massBaryon,
            halo.mass_fromProgenitors],
        dtype=dtype_halo)

In [None]:
closeHaloOutput(haloOutputFile,haloOutputData,haloOutputDataset,haloOutput_iRec)