In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [150]:
df = pd.read_csv("experiments_2.csv")

In [None]:
df.head()

In [None]:
df.describe()

In [153]:
parameters = df['tested_parameter'].unique()

# Dataframe analysis

In [154]:
RESULT_COLUMNS = [
    "time",
    "cost",
    "connectivity",
    "fitness",
    "best_fitness_gen",
    "execution_time"
]

In [155]:
def analyze_all_parameters(df, result_cols):
    results = {}

    for param in df["tested_parameter"].unique():
        sub_df = df[df["tested_parameter"] == param]

        grouped = (
            sub_df
            .groupby(param, as_index=False)[result_cols]
            .agg(["mean", "std", "count"])
        )

        results[param] = grouped

    return results

analysis = analyze_all_parameters(df, RESULT_COLUMNS)

In [None]:
analysis.keys()

In [None]:
analysis['initial_population_size']

In [None]:
analysis['generations']

# Plots

In [159]:
def filter_by_tested_parameter(df, tested_param_name):
    return df[df["tested_parameter"] == tested_param_name].copy()

In [160]:
def plot_mean_metric(df, tested_param, metric):
    """
    Строит график среднего значения метрики
    в зависимости от тестируемого параметра.
    """

    df_param = filter_by_tested_parameter(df, tested_param)
    
    summary = (
        df_param
        .groupby(tested_param)[metric]
        .mean()
        .reset_index()
        .sort_values(tested_param)
    )

    plt.figure(figsize=(7, 4))
    plt.plot(summary[tested_param], summary[metric], marker="o")
    plt.xlabel(tested_param)
    plt.ylabel(f"Mean {metric}")
    plt.title(f"{metric} vs {tested_param}")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [161]:
def plot_mean_std(df, tested_param, metric):
    """
    Строит график среднего значения метрики
    с полосами стандартного отклонения.
    """

    df_param = filter_by_tested_parameter(df, tested_param)

    summary = (
        df_param
        .groupby(tested_param)[metric]
        .agg(["mean", "std"])
        .reset_index()
        .sort_values(tested_param)
    )

    plt.figure(figsize=(7, 4))
    plt.errorbar(
        summary[tested_param],
        summary["mean"],
        yerr=summary["std"],
        marker="o",
        capsize=4
    )

    plt.xlabel(tested_param)
    plt.ylabel(metric)
    plt.title(f"{metric}: mean ± std")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [162]:
def plot_boxplot(df, tested_param, metric):
    """
    Строит boxplot метрики по прогонам
    для каждого значения тестируемого параметра.
    """

    df_param = filter_by_tested_parameter(df, tested_param)

    data = []
    labels = []

    for value in sorted(df_param[tested_param].unique()):
        subset = df_param[df_param[tested_param] == value][metric]
        data.append(subset)
        labels.append(str(value))

    plt.figure(figsize=(8, 4))
    plt.boxplot(data, labels=labels, showfliers=True)
    plt.xlabel(tested_param)
    plt.ylabel(metric)
    plt.title(f"{metric} distribution by {tested_param}")
    plt.grid(True, axis="y")
    plt.tight_layout()
    plt.show()


In [168]:
tested = parameters[3]
result = 'execution_time'

In [None]:
df_param= filter_by_tested_parameter(df, tested)
# df_param[df_param["generations"] == 35]
summary = (
        df_param
        .groupby(tested)[result]
        .agg(mean_value='mean', count='count')
        .reset_index()
        .sort_values(tested)
    )
summary

In [None]:
plot_mean_metric(df, tested, result)

In [None]:
plot_mean_std(df, tested, result)

In [None]:
plot_boxplot(df, tested, result)