In [1]:
from functools import reduce
from itertools import compress
import argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

N_GENERATIONS = 500
N_SEEDS = 20
file = "results/41_46/results.csv"

In [2]:
df = pd.read_csv(file, delim_whitespace=True)

# Pad non-present generations with the last changing value
new_index = pd.Index(np.arange(1, N_GENERATIONS + 1), name="gen")

# Create a vector of dataframes for each seed
df_vec = [
    df[df.seed == seed].set_index("gen").reindex(new_index).pad().reset_index()
    for seed in range(42, 42 + N_SEEDS)
]

# Filter seeds where only 5 generations where executed (configurator bug)
df_filter = [
    df[df.seed == seed]["gen"].max() > 5 for seed in range(42, 42 + N_SEEDS)
]
df_vec = list(compress(df_vec, df_filter))
results_df_vec = reduce(lambda x, y: pd.merge(x, y, how="outer"), df_vec)

In [3]:
day_min=41
day_mid=43
day_max=46
df

Unnamed: 0,seed,gen,H42,H43,H44,H45,H46,D42,D43,D44,...,R42,R43,R44,R45,R46,T42,T43,T44,T45,T46
0,42,1,21170.78542,22490.20890,23799.84606,25094.97738,26371.73322,18249.10240,14851.25494,12073.27706,...,39785.64260,42681.96645,44717.71744,46036.31883,46769.02267,6834.6,7516.4,8232.5,8968.2,9711.7
1,42,2,17703.47254,18886.29305,20105.59053,21355.52952,22630.70958,26763.17260,23032.56864,19803.53779,...,45510.94836,51152.56727,55828.93317,59622.38668,62634.59652,4418.9,5006.5,5647.3,6326.4,7031.4
2,42,6,14896.46081,15959.11181,17044.44977,18146.19742,19259.23077,23220.30693,19919.48691,17004.82708,...,41086.04446,45524.07815,49068.43529,51813.26988,53868.30754,4714.8,5212.1,5748.3,6310.6,6888.5
3,42,8,14892.32022,16234.23617,17650.91225,19130.83961,20663.35784,28321.21063,25073.70684,22091.74993,...,51460.04384,59240.06611,65910.55246,71488.56155,76055.60597,5044.1,5757.7,6549.6,7401.3,8296.8
4,42,9,16582.14847,17664.46391,18783.88703,19932.46767,21103.74273,37089.63889,34533.29615,31822.07993,...,41383.76584,46493.18133,50983.93379,54843.56380,58098.85380,4300.3,4823.7,5396.1,6007.4,6648.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
318,60,227,21124.47206,23557.41761,26095.34330,28737.07082,31481.68763,49053.25134,51640.62467,54161.66704,...,31531.41980,32469.54421,33423.49909,34399.63310,35400.83284,5068.0,5366.5,5669.2,5976.2,6287.9
319,60,266,20214.69057,22600.87864,25092.41126,27688.05750,30386.85297,48593.17950,51181.34009,53702.23118,...,30454.43270,31417.95959,32396.28297,33395.71203,34419.11424,4893.7,5174.8,5460.0,5749.8,6044.3
320,60,278,19709.09503,21952.64668,24293.33772,26729.78438,29260.85378,47999.39256,50504.07983,52936.08149,...,30904.51508,31859.04181,32825.94877,33811.37659,34818.12090,4479.6,4761.4,5046.5,5335.2,5627.7
321,60,365,19734.18940,21977.51988,24317.09630,26751.84400,29281.00847,48580.52859,51291.62964,53944.43852,...,30339.96044,31057.83519,31785.63165,32533.58618,33307.01150,4472.3,4743.8,5015.7,5288.5,5562.4


In [14]:
df = results_df_vec.copy()

validation_list = []
test_list = []
for i in range(len(df.columns)-2):
    if (i % (day_max-day_min)) < (day_mid-day_min):
        validation_list.append(i)
    else:
        test_list.append(i)

validation_list = np.array(validation_list) + 2
test_list = np.array(test_list) + 2

df.drop(df.columns[validation_list], axis = 1, inplace=True)

In [20]:
del df["seed"]

In [21]:
df

Unnamed: 0,gen,H44,H45,H46,D44,D45,D46,R44,R45,R46,T44,T45,T46
0,1,23799.84606,25094.97738,26371.73322,12073.27706,9813.28447,7981.38972,44717.71744,46036.31883,46769.02267,8232.5,8968.2,9711.7
1,2,20105.59053,21355.52952,22630.70958,19803.53779,17032.09305,14667.85862,55828.93317,59622.38668,62634.59652,5647.3,6326.4,7031.4
2,3,20105.59053,21355.52952,22630.70958,19803.53779,17032.09305,14667.85862,55828.93317,59622.38668,62634.59652,5647.3,6326.4,7031.4
3,4,20105.59053,21355.52952,22630.70958,19803.53779,17032.09305,14667.85862,55828.93317,59622.38668,62634.59652,5647.3,6326.4,7031.4
4,5,20105.59053,21355.52952,22630.70958,19803.53779,17032.09305,14667.85862,55828.93317,59622.38668,62634.59652,5647.3,6326.4,7031.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8495,496,24317.09630,26751.84400,29281.00847,53944.43852,56543.08272,59091.66884,31785.63165,32533.58618,33307.01150,5015.7,5288.5,5562.4
8496,497,24317.09630,26751.84400,29281.00847,53944.43852,56543.08272,59091.66884,31785.63165,32533.58618,33307.01150,5015.7,5288.5,5562.4
8497,498,24317.09630,26751.84400,29281.00847,53944.43852,56543.08272,59091.66884,31785.63165,32533.58618,33307.01150,5015.7,5288.5,5562.4
8498,499,24317.09630,26751.84400,29281.00847,53944.43852,56543.08272,59091.66884,31785.63165,32533.58618,33307.01150,5015.7,5288.5,5562.4


In [43]:
df.columns[validation_list]

Index(['H42', 'H43', 'D42', 'D43', 'R42', 'R43', 'T42', 'T43'], dtype='object')