In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scienceplots
import pickle
import os
import sys

CURR_DIR = os.path.dirname(os.path.realpath("__file__"))
ROOT_DIR = os.path.abspath(os.path.join(CURR_DIR))
LIB_DIR = os.path.join(ROOT_DIR, 'libs')
EXP_DIR = os.path.join(CURR_DIR, "doublependulum_exp")
sys.path.append(LIB_DIR)

## 手法の分析

In [2]:
def get_global_bests(hist_fit: list) -> list:
    """globalベストのリストを返す

    Args:
        hist_fit (list): 各世代の最良適応度のリスト

    Returns:
        list: globalベストのリスト
    """
    global_bests = [hist_fit[0]]
    for i in range(1, len(hist_fit)):
        if hist_fit[i] > global_bests[-1]:
            global_bests.append(hist_fit[i])
        else:
            global_bests.append(global_bests[-1])

    return global_bests

def get_mean_std(df_hist_pop: pd.DataFrame) -> tuple:
    """各世代の平均適応度、標準偏差を返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        tuple: 各世代の平均適応度、標準偏差のリスト
    """
    hist_avg_fit = df_hist_pop.groupby("generation").mean()["fitness"].values
    hist_std_fit = df_hist_pop.groupby("generation").std()["fitness"].values

    return (hist_avg_fit, hist_std_fit)

def get_species_statistics(df_hist_pop: pd.DataFrame) -> dict:
    """各種の統計データを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        dict: 各種の統計データの辞書, {種のID: {"created": 種の生成世代, "extinct": 種の絶滅世代, "ancestor": 祖先の種ID, "fitness_hist_average": 適応度の平均リスト, "fitness_hist_std": 適応度の標準偏差リスト}}
    """
    max_gen = df_hist_pop["generation"].max() # 終了世代
    species_data = {}
    for key in df_hist_pop["species"].unique():
        current_species = df_hist_pop[df_hist_pop["species"] == key] # 現在の種の行のみを取得

        ### 種の生存期間
        created = current_species["generation"].min()     # 種の生成世代
        extinct = current_species["generation"].max() + 1 # 種の絶滅世代（計算上，種が消されるのは次の世代）

        ### 祖先の種の取得
        first_genome_parent = current_species['parent1'].iloc[0] # 種に属する最初の個体の親から派生元の種を特定
        if first_genome_parent==-1:
                # 初期個体の場合は祖先なし
                ancestor = -1
        else:
            # 祖先の種を特定
            ancestor = df_hist_pop['species'].iloc[(df_hist_pop['id']==first_genome_parent).idxmax()] # 種に属する最初の個体の親idが最後に所属した種id

        ### 種の適応度の推移
        fitness_hist_average = np.array(current_species.groupby("generation").mean()["fitness"].values)
        fitness_hist_std = np.array(current_species.groupby("generation").std()["fitness"].values)

        fitness_hist_average_zeros = np.zeros(max_gen+1)
        fitness_hist_average_zeros[created:extinct] = fitness_hist_average
        fitness_hist_std_zeros = np.zeros(max_gen+1)
        fitness_hist_std_zeros[created:extinct] = fitness_hist_std

        fitness_hist_average = fitness_hist_average_zeros
        fitness_hist_std = fitness_hist_std_zeros

        species_data[key] = {
            "created": created,
            "extinct": extinct,
            "ancestor": ancestor,
            "fitness_hist_average": fitness_hist_average,
            "fitness_hist_std": fitness_hist_std
        }

    return species_data

def get_species_order(species_data: dict) -> list:
    """種の生成世代順に並べたリストを返す

    Args:
        species_data (dict): 種の統計データの辞書

    Returns:
        list: 種の生成世代順に並べたリスト
    """
    order = []
    stack = [-1] # 祖先なしの種にはancestor=-1が設定されているため，-1を初期値としてstackに追加
    while len(stack)>0:
        k = stack.pop(0)
        for key,species in species_data.items():
            if species['ancestor']==k:
                stack.insert(0, key) # stackの先頭にkeyを挿入
        order.append(k) # stackから取り出したkeyをorderに追加
    order = order[1:]  # 最初の-1を削除

    return order

def get_num_species_hist(df_hist_pop: pd.DataFrame) -> np.ndarray:
    """各世代の種の数のリストを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        np.ndarray: 各世代の種の数のリスト
    """
    df_hist_group_gen = df_hist_pop.groupby("generation")
    num_species_hist = np.array([len(df_hist_group_gen.get_group(i)["species"].unique()) for i in range(df_hist_pop["generation"].max()+1)])

    return num_species_hist


In [3]:
def get_num_species_hist(df_hist_pop: pd.DataFrame) -> np.ndarray:
    """各世代の種の構造情報を返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        np.ndarray: 各世代の種の数のリスト
    """
    df_hist_group_gen = df_hist_pop.groupby("generation")
    num_species_hist = np.array([len(df_hist_group_gen.get_group(i)["species"].unique()) for i in range(df_hist_pop["generation"].max()+1)])

    return num_species_hist

# 所属する種の変更回数（移籍回数）の履歴を取得
def get_num_species_transfers_hist(df_hist_pop: pd.DataFrame) -> np.ndarray:
    """各世代の所属する種の変更回数（移籍回数）のリストを返す

    Args:
        df_hist_pop (pd.DataFrame): 各世代の適応度のDataFrame

    Returns:
        np.ndarray: 各世代の所属する種の変更回数（移籍回数）のリスト
    """
    df_hist_group_gen = df_hist_pop.groupby("generation")

In [3]:
data_dirs = os.listdir(EXP_DIR)
exp_name_uniques = []
for data_dir in data_dirs:
    if data_dir.split("_")[0] not in exp_name_uniques:
        exp_name_uniques.append(data_dir.split("_")[0])

In [4]:
exp_name_uniques

['hybsmrets32repopc2', 'nocrossover', 'normalpc2', 'normal']

In [5]:
num_trials = 10
for exp_name in ["normal", "nocrossover", "normalpc2", "hybsmrets32repopc2"]:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name and "." not in data_dir]
    exp_traials.sort()

    best_fits = []
    for i, exp_trial in enumerate(exp_traials[:num_trials]):
        df_history_fitness = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_fitness.csv"))
        best_fits.append(df_history_fitness["fitness"].max())

    mean = np.mean(best_fits)
    std = np.std(best_fits)
    print(f"{exp_name} Best Fitness: {np.mean(best_fits)} ± {np.std(best_fits)}")

normal Best Fitness: 7955.400055876036 ± 2027.029521129524
nocrossover Best Fitness: 3967.81741462977 ± 3113.747919152365
normalpc2 Best Fitness: 5744.907381926473 ± 2978.1159418464986
hybsmrets32repopc2 Best Fitness: 7552.250041126504 ± 3155.907505214801


In [14]:
for exp_name in ["normal", "nocrossover"]:
    exp_traials = [data_dir for data_dir in data_dirs if data_dir.split("_")[0]==exp_name and "." not in data_dir]
    exp_traials.sort()

    num_species = []
    for i, exp_trial in enumerate(exp_traials[:num_trials]):
        df_history_pop = pd.read_csv(os.path.join(EXP_DIR, exp_trial, "history_pop.csv"))
        num_specie = len(df_history_pop["species"].unique())
        num_species.append(num_specie)

    mean = np.mean(best_fits)
    print(f"{exp_name} Num Species: {np.mean(num_species)} ± {np.std(num_species)}")

normal Num Species: 10.5 ± 2.1095023109728985
nocrossover Num Species: 12.4 ± 4.565084884205331


## ゲノムの分析

In [4]:
algo_name = "hybsmre01ts32_1"

file_path = os.path.join(EXP_DIR, algo_name, "genome")

# ファイル名のリストを取得
file_names = os.listdir(file_path)
file_names.sort(key=lambda x: int(x.split(".")[0]), reverse=True)
print(file_names[0])

69662.pickle


In [6]:
# ファイルを読み込む
col_names = ["id(key)", "fitness", "num_input", "num_output", "num_hidden", "num_connection", "num_params"]
df = pd.DataFrame()

for i in range(10):
    print(f"Trial {i}")
    algo_name = "elits100100_" + str(i)

    file_path = os.path.join(EXP_DIR, algo_name, "genome")

    # ファイル名のリストを取得
    file_names = os.listdir(file_path)
    file_names.sort(key=lambda x: int(x.split(".")[0]), reverse=True)
    # print(file_names[0])
    file_path = os.path.join(file_path, file_names[0])
    with open(file_path, "rb") as f:
        genome = pickle.load(f)

    num_out = 1
    num_input = 11
    network_info = {
        "id(key)": genome.key,
        "fitness": genome.fitness,
        "num_input": num_input,
        "num_output": num_out,
        "num_hidden": None,
        "num_connection": None,
        "num_params": None
    }

    network_info["num_hidden"] = len(genome.nodes) - num_out
    network_info["num_connection"] = len(genome.connections)
    network_info["num_params"] = network_info["num_connection"]

    df = pd.concat([df, pd.DataFrame(network_info, index=[i])])

    print(network_info)
    print()

Trial 0
{'id(key)': 74927, 'fitness': 909.1467767007762, 'num_input': 11, 'num_output': 1, 'num_hidden': 0, 'num_connection': 6, 'num_params': 6}

Trial 1
{'id(key)': 74870, 'fitness': 526.0320739878705, 'num_input': 11, 'num_output': 1, 'num_hidden': 0, 'num_connection': 4, 'num_params': 4}

Trial 2
{'id(key)': 74980, 'fitness': 8851.634025438228, 'num_input': 11, 'num_output': 1, 'num_hidden': 0, 'num_connection': 5, 'num_params': 5}

Trial 3
{'id(key)': 74982, 'fitness': 830.5906412123024, 'num_input': 11, 'num_output': 1, 'num_hidden': 1, 'num_connection': 9, 'num_params': 9}

Trial 4
{'id(key)': 74966, 'fitness': 1270.9325928110197, 'num_input': 11, 'num_output': 1, 'num_hidden': 6, 'num_connection': 16, 'num_params': 16}

Trial 5
{'id(key)': 74889, 'fitness': 522.2644178048437, 'num_input': 11, 'num_output': 1, 'num_hidden': 1, 'num_connection': 9, 'num_params': 9}

Trial 6
{'id(key)': 74888, 'fitness': 357.67180805385084, 'num_input': 11, 'num_output': 1, 'num_hidden': 3, 'num_c

In [7]:
df

Unnamed: 0,id(key),fitness,num_input,num_output,num_hidden,num_connection,num_params
0,74927,909.146777,11,1,0,6,6
1,74870,526.032074,11,1,0,4,4
2,74980,8851.634025,11,1,0,5,5
3,74982,830.590641,11,1,1,9,9
4,74966,1270.932593,11,1,6,16,16
5,74889,522.264418,11,1,1,9,9
6,74888,357.671808,11,1,3,16,16
7,74864,588.635606,11,1,0,7,7
8,74939,9328.636622,11,1,3,15,15
9,74943,8512.194276,11,1,2,8,8


In [8]:
mean_num_hidden = df["num_hidden"].mean()
std_num_hidden = df["num_hidden"].std()
mean_num_connection = df["num_connection"].mean()
std_num_connection = df["num_connection"].std()
mean_fit = df["fitness"].mean()
std_fit = df["fitness"].std()

print(f"Mean Fitness: {mean_fit} ± {std_fit}")
print(f"Mean Num Hidden: {mean_num_hidden} ± {std_num_hidden}")
print(f"Mean Num Connection: {mean_num_connection} ± {std_num_connection}")

Mean Fitness: 3169.773883981311 ± 3965.311560942334
Mean Num Hidden: 1.6 ± 1.9550504398153572
Mean Num Connection: 9.5 ± 4.552166761249221
