In [1]:
import re

import numpy as np

from IPython.display import display, Markdown
from pathlib import Path
from collections import defaultdict
from itertools import chain

In [2]:
CC_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")

In [3]:
BIODIVERSITY_PATTERN = re.compile(r"The simulation resulted in a biodiversity of (\d+) unique species\.")
EXECUTION_PATTERN = re.compile(r"The simulation took:\n - initialisation: ([^\n]+)\n - execution: ([^\n]+)\n - cleanup: ([^\n]+)\n")
EVENT_PATTERN = re.compile(r"Event Summary:\n - Total #individuals:\n   \d+\n - Total #events:\n   - raw:\n     (\d+)")

In [4]:
TIME_PATTERN = re.compile(r"(\d+(?:\.\d+)?)([^\d]+)")
TIME_UNITS = {
    "ns": 0.000000001,
    "µs": 0.000001,
    "ms": 0.001,
    "s": 1.0,
}

def parse_time(time_str):
    match = TIME_PATTERN.match(time_str)
    
    if match is None:
        return None
    
    return float(match.group(1)) * TIME_UNITS[match.group(2)]

In [5]:
DOMAIN_DIR_PATTERN = re.compile(r"([+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)")
DOMAIN_FILE_PATTERN = re.compile(r"([+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)/limit\.isolated\.o\d+\.\d+")
DOMAIN_REPLAY_PATTERN = re.compile(r"([+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)/limit\.\d+(?:\[\])?\.pbs\.o\d+")

In [6]:
display(Markdown("# Scalability of the IsolatedLandscape Independent Algorithm variant:"))

throughput_results = defaultdict(list)
redundancy_results = defaultdict(list)

for path in Path(".").iterdir():
    match = DOMAIN_DIR_PATTERN.match(str(path))
    
    if match is None:
        continue
    
    for path in Path(path).iterdir():
        match = DOMAIN_FILE_PATTERN.match(str(path))
        
        if match is not None:
            sample = float(match.group(1))

            with open(path) as file:
                stdout = file.read()

            match = EXECUTION_PATTERN.search(stdout)
            if match is None:
                print(path)
                continue
            initialisation = parse_time(match.group(1))
            execution = parse_time(match.group(2))
            cleanup = parse_time(match.group(3))

            throughput_results[sample].append((initialisation, execution, cleanup))
        
        match = DOMAIN_REPLAY_PATTERN.match(str(path))
        
        if match is not None:
            sample = float(match.group(1))
            
            with open(path) as file:
                stdout = file.read()
                
            match = BIODIVERSITY_PATTERN.search(stdout)
            if match is None:
                print(path)
                continue
            biodiversity = int(match.group(1))
            
            match = EVENT_PATTERN.search(stdout)
            if match is None:
                print(path)
                continue
            raw_speciations = int(match.group(1))
            
            print(sample, biodiversity, raw_speciations, raw_speciations / biodiversity)
            
            redundancy_results[sample].append((biodiversity, raw_speciations, raw_speciations / biodiversity))

results = { domain: [
    (np.median(s), np.mean(s), np.std(s), len(s)) for s in chain(zip(*result), zip(*redundancy_results[domain]))
] for domain, result in throughput_results.items() }

(
    domains, initialisations, executions, cleanups, biodiversities, raw_speciations, redundancies
) = zip(*sorted([domain] + rs for domain, rs in results.items()))

for domain, execution, redundancy in zip(domains, executions, redundancies):
    display(Markdown(fr"* ${domain}$ individuals: ${np.round(execution[1], 2)}s \pm {np.round(execution[2], 2)}s$ per batch [{execution[3]} samples] -> ${np.round(redundancy[1] * 100, 2)}\% \pm {np.round(redundancy[2] * 100, 2)}\%$"))

# Scalability of the IsolatedLandscape Independent Algorithm variant:

10000000000.0 549112 2490116 4.534805285624791
10000000000.0 549066 2488176 4.531651932554556
10000000000.0 547749 2486009 4.538591581180431
10000000000.0 548893 2495337 4.546126476380643
10000000000.0 546998 2483023 4.539363946486093
10000000000.0 548707 2490085 4.538095923689692
10000000000.0 548960 2488309 4.532769236374235
10000000000.0 549814 2500675 4.548219943471793
10000000000.0 547791 2485928 4.538095733591826
10000000000.0 548047 2491130 4.545467815716536
71000000000.0 685736 10976942 16.007533511438805
71000000000.0 685544 10993020 16.03546964162767
71000000000.0 686924 11010866 16.02923467516057
71000000000.0 685369 10975305 16.01371669859594
71000000000.0 686484 10998981 16.02219571031517
71000000000.0 687287 11046094 16.072025223814798
71000000000.0 685355 10958309 15.989244989822792
71000000000.0 687180 10982778 15.982388893739632
71000000000.0 685453 10993771 16.03869411907162
71000000000.0 686017 11009660 16.04866934784415
1000000000.0 389852 640412 1.642705436935042
1

* $1000000000.0$ individuals: $13722.85s \pm 2540.23s$ per batch [100 samples] -> $164.19\% \pm 0.25\%$

* $10000000000.0$ individuals: $5584.37s \pm 2276.5s$ per batch [1000 samples] -> $453.93\% \pm 0.54\%$

* $71000000000.0$ individuals: $3617.18s \pm 2138.75s$ per batch [7100 samples] -> $1602.39\% \pm 2.57\%$