# Python implementation of L-Galaxies

This is a playground to test out the possibility of using `python` as an interface into L-Galaxies.

In [None]:
# Imports
import astropy.constants as c
import astropy.units as u
import gc
import h5py
h5py.enable_ipython_completer()
import numpy as np
import yaml
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_context('notebook')
sns.set_style('whitegrid')

In [None]:
# Script parameters
file_parameters='input/input.yml'
displayParameters=True

In [None]:
# Read in parameters, graph file, etc.

parameters=yaml.load(open(file_parameters),Loader=yaml.FullLoader)
if displayParameters:
    for item in parameters:
        print("{:20s}: {}".format(item,parameters[item]))

graphFile=parameters['inputFiles']['graphFile']
graphData=h5py.File(graphFile,'r')

## Data structure for halos

This is an interesting problem.  We have many requirements:
- Must be fast – does this remove the possibility of using objects?
- Must be flexible enough to respond to parameter choices (ideally at run time).
- Must allow for variable-length arrays – I think that each halo will individually need to track what fraction of material it inherits from each progenitor.

I think that the object-oriented way of doing it, as below, can easily adapt to run-time choices because it does not use long arrays.

I have initially coded it using lists of haloProperties.  There is a numpy.object dtype that would allow one to make that numpy arrays but I don't know if it offers any performance advantages or disadvantages.  The numpy objects can have arbitrary data added to them, but again I don't know if this flexibility means that they will be very slow (due to having to continually shift things around in memory).

This is all very far from the current method that we have in L-Galaxies of defining a galaxy structure at compile time.

In [None]:
# This haloProperties class is just a container for all the halo properties.
# It is not expected that it should have any sophisticated methods.
# The constructer merely provides the unique labels for each halo;
# other properties may then be added.
# If this dynamic variable allocation is too slow, we could presumably find a way
# to declare all the variables that we will need at the time of construction.
class haloProperties:
    # Constructor.
    def __init__(self,graph,snap,haloID):
        self.graph=graph
        self.snap=snap
        self.haloID=haloID

## Functions

Most of the work to be done in external routines, probably to be coded in C for efficiency.

Here we just include the high-level driver routines.

In [None]:
# Processing halos 
def processHalo(halo):
    if doneHalo[halo]==True: 
        print('Warning: processHalo: halo ',str(halo),' already processed.')
        return
#     readProperties(halo)
#     # Check that progenitors have been processed
#     for progHalo in haloProperties_thisSnap[halo].prog:
#         # This is the iterative way to do it.  But could lead to excessive storage.
#         # if not doneHalo[progHalo]: processHalo(progHalo) 
#         # Better simply to process in snapshot order.  In which case:
#         assert(doneHalo[progHalo])
#     gatherProgenitors(halo)
#     fixBaryonFraction(halo)
#     fixStellarFraction(halo) # Dummy routine.
#     outputHalo(halo)         # Is this the right place to do this?
     doneHalo[halo]=True
    
def readProperties(halo):
    # Reads halo properties from the input graph file
    return

def gatherProgenitors(halo):
    # Collects information about material inherited from progenitors
    return

## Main routine

In [None]:
# Some dummy input just to test structure
from copy import deepcopy

dummyHaloDict={'Halos': np.array([0,1,2,3,4])}
nHaloDict=len(dummyHaloDict['Halos'])
snaps=(3,4,5,6,7,8)
nSnap=len(snaps)
nGraph=3
# Spent AGES on this.  You need to deepcopy to avoid getting pointers
graphs=list({snap:deepcopy(dummyHaloDict) for snap in snaps} for iGraph in range(nGraph))
nHaloInGraph=nSnap*nHaloDict
nHalo=nGraph*nHaloInGraph
iGraph=0
for graph in graphs:
    #print('graph ',iGraph)
    iSnap=0
    for snap in snaps:
        #print('snap ',iSnap)
        graph[snap]['Halos']+=iGraph*nHaloInGraph+iSnap*nHaloDict
        #print(graph[snap]['Halos'])
        iSnap+=1
    iGraph+=1

In [None]:
# Iteratively loop over halos, doing whatever processing is required.
# This assumes that halos properties depend only upon those halos in 
# their immediate past in the merger graph.

# Note: no attempt here to include sub-halos.  Let's get halos right
# first!

# Note: this is currently written in pseudo-code.
# Convert to real code once we have the structure right.

# Note that we need to know the number of halos and these
# need to be identified some way with unique IDs

# Initialise halos as not done
# nHalo = graphData['nHalo']
doneHalo = np.full(nHalo,False,dtype=np.bool)

# Loop over MergerGraphs.
for graph in graphs:
    # Loop over snapshots from first to last.
    haloProperties_lastSnap = None
    for snap in snaps:
        #nHaloSnap = graph[snap]['nHalo']
        # initialise the halos in this snapshot
        # How do we define an array of classes?  For now, use a list
        haloProperties_thisSnap=[haloProperties(graph,snap,iHalo) for iHalo in range(nHaloDict)]
        # Loop over halos in snapshot.
        for halo in graph[snap]['Halos']: processHalo(halo)
        # Once all halos have been done, update reference to lastSnap (and hence free memory)
        # To be on the safe side, first del and call the garbage collector (probably not necessary)
        del haloProperties_lastSnap
        gc.collect()
        haloProperties_lastSnap=haloProperties_thisSnap
        # Delete reference to this memory to free variable for new use.
        del haloProperties_thisSnap      

In [None]:
#print(list(haloProperties_thisSnap[i].haloID for i in range(nHaloDict)))
print(list(haloProperties_lastSnap[i].haloID for i in range(nHaloDict)))
print(np.where(doneHalo==False))

In [None]:
print(graphData['0']['009'][:])