In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scienceplots
import os

In [42]:
def get_global_bests(hist_fit: list) -> list:
    """globalベストのリストを返す

    Args:
        hist_fit (list): 各世代の最良適応度のリスト

    Returns:
        list: globalベストのリスト
    """
    global_bests = [hist_fit[0]]
    for i in range(1, len(hist_fit)):
        if hist_fit[i] > global_bests[-1]:
            global_bests.append(hist_fit[i])
        else:
            global_bests.append(global_bests[-1])

    return global_bests

def get_mean_std(df_hist_pop: pd.DataFrame) -> tuple:
    """各世代の平均適応度、標準偏差を返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        tuple: 各世代の平均適応度、標準偏差のリスト
    """
    hist_avg_fit = df_hist_pop.groupby("generation").mean()["fitness"].values
    hist_std_fit = df_hist_pop.groupby("generation").std()["fitness"].values

    return (hist_avg_fit, hist_std_fit)

def get_species_statistics(df_hist_pop: pd.DataFrame) -> dict:
    """各種の統計データを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        dict: 各種の統計データの辞書, {種のID: {"created": 種の生成世代, "extinct": 種の絶滅世代, "ancestor": 祖先の種ID, "fitness_hist_average": 適応度の平均リスト, "fitness_hist_std": 適応度の標準偏差リスト}}
    """
    max_gen = df_hist_pop["generation"].max() # 終了世代
    species_data = {}
    for key in df_hist_pop["species"].unique():
        current_species = df_hist_pop[df_hist_pop["species"] == key] # 現在の種の行のみを取得

        ### 種の生存期間
        created = current_species["generation"].min()     # 種の生成世代
        extinct = current_species["generation"].max() + 1 # 種の絶滅世代（計算上，種が消されるのは次の世代）

        ### 祖先の種の取得
        first_genome_parent = current_species['parent1'].iloc[0] # 種に属する最初の個体の親から派生元の種を特定
        if first_genome_parent==-1:
                # 初期個体の場合は祖先なし
                ancestor = -1
        else:
            # 祖先の種を特定
            ancestor = df_hist_pop['species'].iloc[(df_hist_pop['id']==first_genome_parent).idxmax()] # 種に属する最初の個体の親idが最後に所属した種id

        ### 種の適応度の推移
        fitness_hist_average = np.array(current_species.groupby("generation").mean()["fitness"].values)
        fitness_hist_std = np.array(current_species.groupby("generation").std()["fitness"].values)

        fitness_hist_average_zeros = np.zeros(max_gen+1)
        fitness_hist_average_zeros[created:extinct] = fitness_hist_average
        fitness_hist_std_zeros = np.zeros(max_gen+1)
        fitness_hist_std_zeros[created:extinct] = fitness_hist_std

        fitness_hist_average = fitness_hist_average_zeros
        fitness_hist_std = fitness_hist_std_zeros

        species_data[key] = {
            "created": created,
            "extinct": extinct,
            "ancestor": ancestor,
            "fitness_hist_average": fitness_hist_average,
            "fitness_hist_std": fitness_hist_std
        }

    return species_data

def get_species_order(species_data: dict) -> list:
    """種の生成世代順に並べたリストを返す

    Args:
        species_data (dict): 種の統計データの辞書

    Returns:
        list: 種の生成世代順に並べたリスト
    """
    order = []
    stack = [-1] # 祖先なしの種にはancestor=-1が設定されているため，-1を初期値としてstackに追加
    while len(stack)>0:
        k = stack.pop(0)
        for key,species in species_data.items():
            if species['ancestor']==k:
                stack.insert(0, key) # stackの先頭にkeyを挿入
        order.append(k) # stackから取り出したkeyをorderに追加
    order = order[1:]  # 最初の-1を削除

    return order

def get_num_species_hist(df_hist_pop: pd.DataFrame) -> np.ndarray:
    """各世代の種の数のリストを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        np.ndarray: 各世代の種の数のリスト
    """
    df_hist_group_gen = df_hist_pop.groupby("generation")
    num_species_hist = np.array([len(df_hist_group_gen.get_group(i)["species"].unique()) for i in range(df_hist_pop["generation"].max()+1)])

    return num_species_hist

# 所属する種の変更回数（移籍回数）の履歴を取得
def get_num_species_transfers_hist(df_hist_pop: pd.DataFrame) -> np.ndarray:
    """各世代の所属する種の変更回数（移籍回数）のリストを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        np.ndarray: 各世代の所属する種の変更回数（移籍回数）のリスト
    """
    df_hist_group_gen = df_hist_pop.groupby("generation")

In [15]:
CURR_DIR = os.path.dirname(os.path.realpath("__file__"))
EXP_DIR = os.path.join(CURR_DIR, "doublependulum_exp")
data_dirs = os.listdir(EXP_DIR)
exp_name_uniques = []
for data_dir in data_dirs:
    if data_dir.split("_")[0] not in exp_name_uniques:
        exp_name_uniques.append(data_dir.split("_")[0])

In [31]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))
    fig.suptitle(f"{exp_name} ", fontsize=16)
    axs = [fig.add_subplot(num_row, 3, i+1) for i in range(num_trials)]
    # y軸は0から10000に固定
    for i, exp_trial in enumerate(exp_traials):
        df_history_fitness = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_fitness.csv"))
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))

        global_bests = get_global_bests(df_history_fitness["fitness"].values)
        hist_avg_fit, hist_std_fit = get_mean_std(df_history_pop)

        axs[i].plot(global_bests, label="Global Best", color="red")
        axs[i].plot(hist_avg_fit, label="Mean", color="green")
        axs[i].fill_between(range(len(global_bests)), hist_avg_fit-hist_std_fit, hist_avg_fit+hist_std_fit, alpha=0.3, label="Mean $\pm$ Std", color="blue")
        axs[i].set_ylim(0, 10000)
        axs[i].set_title(f"Trial {i+1}")
        axs[i].set_xlabel("Generation")
        axs[i].set_ylabel("Fitness")
        if i == num_trials-1:
            axs[i].legend()

    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_fitness.png"))
    plt.close()

In [34]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))
    fig.suptitle(f"{exp_name} ", fontsize=16)
    axs = [fig.add_subplot(num_row, 3, i+1) for i in range(num_trials)]
    for i, exp_trial in enumerate(exp_traials):
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))
        species_data = get_species_statistics(df_history_pop)
        species_order = get_species_order(species_data)
        for key in species_order:
            species = species_data[key]
            mean_fit = species["fitness_hist_average"]
            
            axs[i].plot(mean_fit, label=f"Species {key}")

        hist_avg_fit, hist_std_fit = get_mean_std(df_history_pop)
        axs[i].plot(hist_avg_fit, label="Mean", color="green")
        axs[i].set_ylim(0, 2000)
        axs[i].set_title(f"Trial {i+1}")
        axs[i].set_xlabel("Generation")
        axs[i].set_ylabel("Fitness")
        # if i == num_trials-1:
        #     axs[i].legend()

    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_species_fit.png"))
    plt.close()

In [36]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))

    for i, exp_trial in enumerate(exp_traials):
        df_history_fitness = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_fitness.csv"))
        global_bests = get_global_bests(df_history_fitness["fitness"].values)
        plt.plot(global_bests, label=f"Trial {i+1}")

    plt.title(f"{exp_name} Global Best")
    plt.xlabel("Generation")
    plt.ylabel("Fitness")
    plt.ylim(0, 10000)
    plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_global_best.png"))
    plt.close()

In [40]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))

    for i, exp_trial in enumerate(exp_traials):
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))
        mean_fit, _ = get_mean_std(df_history_pop)

        plt.plot(mean_fit, label=f"Trial {i+1}")

    plt.title(f"{exp_name} Mean Fitness")
    plt.xlabel("Generation")
    plt.ylabel("Fitness")
    plt.ylim(0, 500)
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_mean_fit.png"))
    plt.close()

In [41]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))

    for i, exp_trial in enumerate(exp_traials):
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))
        _, std_fit = get_mean_std(df_history_pop)

        plt.plot(std_fit, label=f"Trial {i+1}")

    plt.title(f"{exp_name} Std Fitness")
    plt.xlabel("Generation")
    plt.ylabel("Fitness")
    plt.ylim(0, 2000)
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_std_fit.png"))
    plt.close()

In [46]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'grid', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))

    for i, exp_trial in enumerate(exp_traials):
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))
        num_species_hist = get_num_species_hist(df_history_pop)

        plt.plot(num_species_hist, label=f"Trial {i+1}")

    plt.title(f"{exp_name} Num Species")
    plt.xlabel("Generation")
    plt.ylabel("Num Species")
    plt.legend()

    plt.tight_layout()
    plt.ylim(0, 10)
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_num_species.png"))
    plt.close()

In [47]:
for exp_name in exp_name_uniques:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name]
    exp_traials.sort()

    num_trials = len(exp_traials)
    num_row = int(np.ceil(num_trials/3))
    plt.style.use(['science', 'no-latex'])
    fig = plt.figure(figsize=(16, 9))
    fig.suptitle(f"{exp_name} ", fontsize=16)
    axs = [fig.add_subplot(num_row, 3, i+1) for i in range(num_trials)]
    for i, exp_trial in enumerate(exp_traials):
        species_img = plt.imread(os.path.join(EXP_DIR, exp_trial, "species.jpg"))

        axs[i].imshow(species_img)
        axs[i].axis("off")
        axs[i].set_title(f"Trial {i+1}")

    plt.tight_layout()
    plt.savefig(os.path.join(EXP_DIR, f"{exp_name}_species_img.png"))
    plt.close()