In [3]:
import pathlib
import re
from collections import namedtuple
import sys
import tkinter as tk
from tkinter import filedialog
from pathlib import Path

import geopandas
import pandas as pd


COLUMN_NAMES = {'depos_bitmp': "Deposition [Bq/m2]",
                'toteffout_bitmp': 'Total effective dose [Sv]',
                'thyrod_bitmp': "Thyroid Organ Dose. Outdoor [Gy]",
                'gamratetot_bitmp': 'Dose rate [Sv]',
                }


def parse_filename(filepath):
    # Using the timestamp part of the string as splitter since the rest appears to change.
    # Regex that matches _202005081553_ in AnlopGrotsund_phase1_250m_202005081553_876000_grid_gamratetot_bitmp_Total
    filename = filepath.stem
    timestamp = re.findall(r"_[0-9]{12}_", filename)[0]
    runname, right_part = filename.split(timestamp)
    timestep = right_part.split("_")[0]
    nuclide = right_part.split("_")[-1]

    timestamp = timestamp.strip("_")
    outputname = right_part[len(timestep)+1:len(right_part)-len(nuclide)-1]
    outputname = outputname.split("grid_")[-1]

    # Timestamp is in hours and seems to always end with extra 00, (might be minutes? :)..)
    timestep = timestep[0:-2]
    try:
        timestep = int(timestep)
    except:
        pass

    AGEGROUPS = ['Adults', '1year', '5year', '10year', '15year']
    if outputname.split("_")[-1] in AGEGROUPS:
        agegroup = outputname.split("_")[-1]
        outputname = outputname.split(agegroup)[0].strip('_')
    else:
        agegroup = ''

    extra = nuclide if nuclide else agegroup
    Run_metadata = namedtuple(
        "Run_metadata", ['outputname', 'timestep', 'extra'])
    key = Run_metadata(outputname=outputname, timestep=timestep, extra=extra)

    return runname, timestamp, key


def get_shp_max(shpfile):
    gdf = geopandas.read_file(shpfile)
    # with open(str(shpfile)+".txt", 'w') as testfile:
    #     testfile.write( str(gdf['Value'].max()))
    return {'top10': float(gdf['Value'].nlargest(10).mean()), 'max': float(gdf['Value'].max())}


def get_folder(run_folder="indata"):
    # Check for folder as argument
    if run_folder:
        run_folder = run_folder
    elif len(sys.argv) > 1:
        run_folder = sys.argv[1]
    else:
        root = tk.Tk()
        root.withdraw()

        run_folder = filedialog.askdirectory(
            initialdir="C:/ARGOS-NT", title="Velg mappe for kjøring")

    path = Path(run_folder)
    return path


def main():
    import time
    start = time.process_time()

    # path = r"indata/Grotsund_phase1b_map/20200508T155300Z"
    path = r"indata/Grotsund_phase1b_map"
    path = pathlib.Path(path)
    path = get_folder()
    runs = {}
    print(time.process_time() - start)
    for shp_file in path.glob("**/*.shp"):
        runname, timestamp, key = parse_filename(shp_file)

        runs.setdefault(runname, {})
        runs[runname].setdefault(timestamp, [])
        runs[runname][timestamp].append(shp_file)
    print(time.process_time() - start)
    df = pd.DataFrame()
    for run, timestamps in runs.items():
        for timestamp, filelist in timestamps.items():
            print(f"Reading {run}, {timestamp}")
            # Zeros are needed for pandas to accept these column nams when the rest are multiindex
            timestamp_results = {
                ('run', 0, '0'): run,
                ('timestamp', 0, '0'): timestamp}
            for file in filelist[0:4]:
                _, _, key = parse_filename(file)
                if key.timestep > 0 and key.timestep < 48:
                    #If the run stops before the first wanted timestep -> move value from "stop-timestep" to first wanted
                    print(f"Overriding timestep {key.timestep}h to 48h")                    
                    key48 = key._replace(timestep=48)

                    #Warn that the key dosn't exist. Overwriting should not happen
                    if key48 in timestamp_results.keys():
                        print(
                            f"Warning, override 48h key already existing. {key}. \nOld:{timestamp_results[key48]}\nNew: {get_shp_max(file)['max']} ")
                        continue
                    else:
                        key = key48

                timestamp_results[key] = get_shp_max(file)['max']

            df = df.append(timestamp_results, ignore_index=True)
    print(time.process_time() - start)
    #
    df.columns = pd.MultiIndex.from_tuples(df.columns)
    df = df.reindex(
        sorted(df.columns, key=lambda x: (x[0], x[1], x[2])), axis=1)
    # Setting the indexes should be one step, but wont work..
    df = df.set_index(("run", 0, '0'))
    df = df.set_index(('timestamp', 0, '0'), append=True)
    df.index.rename(["Run", 'Release date'], inplace=True)

    #Rename stuff:
    df.rename_axis(
        ('Endpoint', 'Elapsed time [h]', 'Nuclide/Agegroup'), axis=1, inplace=True)
    df = df.rename(columns=COLUMN_NAMES)
    print(time.process_time() - start)

    df.to_excel(f"Max_values_{path.stem}.xlsx")
    print(time.process_time() - start)
    df.style.background_gradient(cmap='res')
    df.to_html('test.html')
    return df

if __name__ == "__main__":
    df=main()


0.0
0.015625
Reading AnlopGrotsund_ARPANSA, 202005250235
Reading AnlopGrotsund_phase1_250m, 202006161905
9.015625
9.015625
9.03125


In [9]:
df2= df.style.background_gradient(cmap='Reds')

In [10]:
df2

Unnamed: 0_level_0,Endpoint,Deposition [Bq/m2],Dose rate [Sv],isocurve_arrival,isocurve_gamratetot,Total effective dose [Sv]
Unnamed: 0_level_1,Elapsed time [h],48,168,0,168,168
Unnamed: 0_level_2,Nuclide/Agegroup,Cs-137,Total,total,Total,Total
Run,Release date,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
AnlopGrotsund_ARPANSA,202005250235,,9e-06,48.0,1e-06,0.007037
AnlopGrotsund_phase1_250m,202006161905,67.824689,0.0,24.0,,3e-06


In [13]:
with open("test.html",'w') as html:
    html.write(df2.render())