In [None]:
import gc

import os
import psutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from IPython.display import Markdown

import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook_connected"

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = (
    "last_expr"  # показывать только последний вывод
)

In [None]:
print(f"Доступно памяти: {psutil.virtual_memory().available / 1024**3:.2f} GB")

In [None]:
from statsmodels.stats.proportion import proportion_confint
from sympy import sieve

In [None]:
gc.collect()

# Загружаем результаты запусков

In [None]:
df_list = []

In [None]:
df = pd.read_csv("./data/exact.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "exact"
df["function"] = "f(N)"
df["continuous sampling"] = False
df["stratified sampling"] = False
df["sample_amount"] = 0
df_list.append(df.copy())
df

In [None]:
df = pd.read_csv("./data/exact_mu.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "exact"
df["function"] = r"f(N) \cdot \mu ^2 (N)"
df["continuous sampling"] = False
df["stratified sampling"] = False
df["sample_amount"] = 0
df_list.append(df.copy())
df

In [None]:
# 100k samples
df = pd.read_csv("./data/random_sampling.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = "f(N)"
df["continuous sampling"] = False
df["stratified sampling"] = False
df["sample_amount"] = 100000
df_list.append(df.copy())
df

In [None]:
# 100k samples
df = pd.read_csv("./data/random_sampling_mu.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = r"f(N) \cdot \mu ^2 (N)"
df["continuous sampling"] = False
df["stratified sampling"] = False
df["sample_amount"] = 100000
df_list.append(df.copy())
df

In [None]:
# 100k samples
df = pd.read_csv("./data/random_sampling_stratified.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = "f(N)"
df["continuous sampling"] = False
df["stratified sampling"] = True
df["sample_amount"] = 100000
df_list.append(df.copy())
df

In [None]:
# 100k samples
df = pd.read_csv("./data/random_sampling_stratified_mu.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = r"f(N) \cdot \mu ^2 (N)"
df["continuous sampling"] = False
df["stratified sampling"] = True
df["sample_amount"] = 100000
df_list.append(df.copy())
df

In [None]:
# 1000k samples
df = pd.read_csv("./data/random_sampling_continuous.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = "f(N)"
df["continuous sampling"] = True
df["stratified sampling"] = False
df["sample_amount"] = 1000000
df_list.append(df.copy())
df

In [None]:
# 1000k samples
df = pd.read_csv("./data/random_sampling_continuous_mu.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = r"f(N) \cdot \mu ^2 (N)"
df["continuous sampling"] = True
df["stratified sampling"] = False
df["sample_amount"] = 1000000
df_list.append(df.copy())
df

In [None]:
# 1000k samples
df = pd.read_csv("./data/random_sampling_stratified_continuous.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = "f(N)"
df["continuous sampling"] = True
df["stratified sampling"] = True
df["sample_amount"] = 1000000
df_list.append(df.copy())
df

In [None]:
# 1000k samples
df = pd.read_csv("./data/random_sampling_stratified_continuous_mu.csv")
df = df.rename(columns={"n": "N", "probability": "m(N)"})[["N", "m(N)"]]
df["computation"] = "random sampling"
df["function"] = r"f(N) \cdot \mu ^2 (N)"
df["continuous sampling"] = True
df["stratified sampling"] = True
df["sample_amount"] = 1000000
df_list.append(df.copy())
df

## Собираем все в один датафрейм

In [None]:
df = pd.concat(df_list)
del df_list
df = df[df["N"] > 1]
mask = df["function"] == "f(N)"
df.loc[mask, "asymptotic"] = 1 / np.sqrt(np.log(df.loc[mask, "N"]))
df.loc[~mask, "asymptotic"] = 1 / np.sqrt(df.loc[~mask, "N"])
df["C"] = df["m(N)"] / df["asymptotic"]
df

In [None]:
df.groupby(
    [
        "computation",
        "function",
        "continuous sampling",
        "stratified sampling",
        "sample_amount",
    ],
    dropna=False,
)["N"].max()

In [None]:
df["m(N)_low"], df["m(N)_high"] = proportion_confint(
    (df["sample_amount"] * df["m(N)"]).astype("int64"), df["sample_amount"], alpha=0.05, method="beta"
)
df["C_low"] = df["m(N)_low"] / df["asymptotic"]
df["C_high"] = df["m(N)_high"] / df["asymptotic"]

del proportion_confint

df

## Добавляем теоретические значения
## $C_0 = \lim_{N \to +\infty} \sqrt{\ln N} \cdot \prod_{2p \leq N} (1 - \frac{1}{2p})$
## и $C_1 = \lim_{N \to +\infty} \sqrt{\ln N} \cdot \prod_{p \leq N} (1 - \frac{1}{2p})$

In [None]:
def compute_products_map(N_max, half):
    # Получаем список всех простых ≤ N_max
    primes = np.array(list(sieve.primerange(2, N_max + 1)), dtype=np.int32)

    # Считаем кумулятивное произведение по формуле
    prime_factors = 1 - 1 / (2 * primes)
    cumprod = np.insert(np.cumprod(prime_factors), 0, 1)

    # Для каждого N от 0 до N_max находим, сколько простых ≤ N
    # Индексы — это N, значения — это индекс в cumprod
    N_limit = np.arange(N_max + 1) if not half else np.arange(N_max + 1) // 2
    prime_counts = np.searchsorted(primes, N_limit, side='right')

    # Создаем массив значений произведений по N
    product_map = cumprod[prime_counts]

    return product_map

# Получаем максимальное значение N
N_ints = df["N"].astype("int64").values
max_N = N_ints.max()

# Вычисляем отображение: N → произведение
products_C0 = compute_products_map(max_N, half=True)
products_C1 = compute_products_map(max_N, half=False)

# Финальный расчет
df["C_0"] = np.sqrt(np.log(N_ints)) * products_C0[N_ints]
df["C_1"] = np.sqrt(np.log(N_ints)) * products_C1[N_ints]

del compute_products_map
del products_C0
del products_C1
del sieve
del N_ints
del max_N

df

In [None]:
df.to_parquet("df.parquet")

In [None]:
df = pd.read_parquet("df.parquet")
df

In [None]:
gc.collect()

# Графики

In [None]:
def safe_write(filename, write_func, **kwargs):
    if os.path.exists(filename):
        print(f"⚠️  Warning: {filename} уже существует. Пропущено.")
        return
    write_func(filename, **kwargs)
    print(f"Saved to {filename}")


def hex_to_rgba(hex_color, alpha=0.2):
    hex_color = hex_color.lstrip("#")
    r, g, b = tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))
    return f"rgba({r},{g},{b},{alpha})"


def plot_single_line(
    data,
    computation,
    function,
    plot_theory_C,
    values,
    yaxis_title=None,
    save=False,
):

    if isinstance(values, str):
        values_str = values
        values = data[values_str].values

        if function == "f(N)":
            values_str = values_str.replace("m(N)", "m_f(N)")
        else:
            values_str = values_str.replace("m(N)", "m_g(N)")

        col = values_str
        if yaxis_title is None:
            yaxis_title = values_str
    else:
        col = yaxis_title

    fig = go.Figure()

    # Линия: ratio
    fig.add_trace(
        go.Scatter(
            x=data["N"], y=values, mode="lines", name=col, line=dict(color="red")
        )
    )

    if plot_theory_C:
        # Линия: C0
        fig.add_trace(
            go.Scatter(
                x=data["N"],
                y=data["C_0"].values,
                mode="lines",
                name=r"$C_0 = \sqrt{\ln N} \cdot \prod_{2p \leq N} (1 - \frac{1}{2p})$",
                line=dict(color="green"),
            )
        )
        # Линия: C1
        fig.add_trace(
            go.Scatter(
                x=data["N"],
                y=data["C_1"].values,
                mode="lines",
                name=r"$C_1 = \sqrt{\ln N} \cdot \prod_{p \leq N} (1 - \frac{1}{2p})$",
                line=dict(color="blue"),
            )
        )

    title_for_graph = f"${col}\\text{{, function = }}{function}\\text{{, computation = {computation}}}$"
    title_for_saving = f"{col}, function = {function}, computation = {computation}"
    if computation != "exact":
        title_for_graph = title_for_graph[:-1] + f"\\text{{, continuous sampling = {continuous}, stratified sampling = {stratified}}}$"
        title_for_saving = title_for_saving + f", continuous sampling = {continuous}, stratified sampling = {stratified}"

    # Оси, сетка, заголовки
    fig.update_layout(
        width=1200,
        height=750,
        xaxis_title="N",
        yaxis_title=f"${yaxis_title}$",
        template="plotly_white",
        legend=dict(borderwidth=1),
        title=dict(
            text=title_for_graph,
            x=0.5,
            xanchor="center",
        ),
    )

    if save:
        title_for_saving = title_for_saving.replace("\\", "").replace("^", "")
        safe_write(f"./graphs/{title_for_saving}.pdf", fig.write_image)
        safe_write(f"./graphs/{title_for_saving}.html", fig.write_html, include_mathjax="cdn")

    fig.show()

    del fig


def plot_confidence_interval(
    data, function, continuous, stratified, col, window=None, save=False
):

    values = data[col]
    values_high = data[f"{col}_high"]
    values_low = data[f"{col}_low"]

    if window is not None:
        values = values.rolling(window=window).mean()
        values_high = values_high.rolling(window=window).mean()
        values_low = values_low.rolling(window=window).mean()

    fig = go.Figure()

    # Линия: ratio
    fig.add_trace(
        go.Scatter(
            x=data["N"], y=values, mode="lines", name=col, line=dict(color="red")
        )
    )

    if function == "f(N)" and col == "C":
        # Линия: C0
        fig.add_trace(
            go.Scatter(
                x=data["N"],
                y=data["C_0"],
                mode="lines",
                name=r"$C_0 = \sqrt{\ln N} \cdot \prod_{2p \leq N} (1 - \frac{1}{2p})$",
                line=dict(color="green"),
            )
        )
        # Линия: C1
        fig.add_trace(
            go.Scatter(
                x=data["N"],
                y=data["C_1"],
                mode="lines",
                name=r"$C_1 = \sqrt{\ln N} \cdot \prod_{p \leq N} (1 - \frac{1}{2p})$",
                line=dict(color="blue"),
            )
        )

    # Заливка: доверительный интервал (fill between)
    fig.add_trace(
        go.Scatter(
            x=list(data["N"]) + list(data["N"][::-1]),
            y=list(values_high) + list(values_low[::-1]),
            fill="toself",
            # fillcolor="rgba(0, 0, 255, 0.3)",  # синий, прозрачность 30%
            # line=dict(color="rgba(255,255,255,0)"),
            fillcolor=hex_to_rgba("#e41a1c", 0.2),
            line=dict(color="rgba(0,0,0,0)"),
            hoverinfo="skip",
            name="Confidence interval",
            showlegend=True,
        )
    )

    window_comment = f", усреднение \ окном \ {window}" if window is not None else ""
    title = rf"{col}, function = {function}, continuous \ sampling = {continuous}, stratified \ sampling = {stratified}{window_comment}"
    yaxis_title = (
        "m(N) \cdot \sqrt{\ln N}"
        if col == "C" else col
    )
    if function == "f(N)":
        yaxis_title = yaxis_title.replace("m(N)", "m_f(N)")
        title = title.replace("m(N)", "m_f(N)")
    else:
        yaxis_title = yaxis_title.replace("m(N)", "m_g(N)")
        title = title.replace("m(N)", "m_g(N)")
    
    # Оси, сетка, заголовки
    fig.update_layout(
        width=1200,
        height=750,
        xaxis_title="N",
        yaxis_title=f"${yaxis_title}$",
        template="plotly_white",
        legend=dict(borderwidth=1),
        title=dict(
            text=f"<span style='font-size:20px;'>${title}$</span>",
            x=0.5,
            xanchor="center",
        ),
    )

    if save:
        title_for_saving = title.replace("\\", "").replace("  ", " ").replace("^", "")
        safe_write(f"./graphs/{title_for_saving}.pdf", fig.write_image)
        safe_write(f"./graphs/{title_for_saving}.html", fig.write_html, include_mathjax="cdn")

    fig.show()

    del fig


def compare_random_samplings(function, col):

    # 🎨 Цвета
    colors = [
        # "#e41a1c",  # red
        # "#377eb8",  # blue
        # "#4daf4a",  # green
        "#984ea3",  # purple
        "#ff7f00",  # orange
        "#ffff33",  # yellow
        "#a65628",  # brown
        "#f781bf",  # pink
        "#999999",  # grey
    ]

    combination_lengths = (
        df[df["function"] == function]
        .groupby(["computation", "continuous sampling", "stratified sampling"])
        .size()
        .sort_values()
        .reset_index()
    )
    combination_lengths[0] = np.minimum(combination_lengths[0], 2000000)

    for i in range(len(combination_lengths) - 1):
        # 📈 Создание графика
        fig = go.Figure()
        length = combination_lengths.iloc[i][0]
        for j in range(i, len(combination_lengths)):
            color = colors[j % len(colors)]
            row = combination_lengths.iloc[j]
            mask = (
                (df["function"] == function)
                & (df["computation"] == row["computation"])
                & (df["continuous sampling"] == row["continuous sampling"])
                & (df["stratified sampling"] == row["stratified sampling"])
            )
            temp_data = df[mask].iloc[:length]

            # Основная линия
            fig.add_trace(
                go.Scatter(
                    x=temp_data["N"],
                    y=temp_data[col],
                    mode="lines",
                    name=f"computation = {row['computation']}, continuous sampling = {row['continuous sampling']}, stratified sampling = {row['stratified sampling']}",
                    line=dict(color=color),
                )
            )

            # Доверительный интервал
            fig.add_trace(
                go.Scatter(
                    x=pd.concat([temp_data["N"], temp_data["N"][::-1]]),
                    y=pd.concat(
                        [temp_data[f"{col}_high"], temp_data[f"{col}_low"][::-1]]
                    ),
                    fill="toself",
                    fillcolor=hex_to_rgba(color, 0.2),
                    line=dict(color="rgba(0,0,0,0)"),
                    hoverinfo="skip",
                    showlegend=False,
                )
            )
            del temp_data

        if function == "f(N)" and col == "C":
            # Линия: C0
            fig.add_trace(
                go.Scatter(
                    x=df[mask].iloc[:length]["N"],
                    y=df[mask].iloc[:length]["C_0"],
                    mode="lines",
                    name=r"$C_0 = \sqrt{\ln N} \cdot \prod_{2p \leq N} (1 - \frac{1}{2p})$",
                    line=dict(color="green"),
                )
            )
            # Линия: C1
            fig.add_trace(
                go.Scatter(
                    x=df[mask].iloc[:length]["N"],
                    y=df[mask].iloc[:length]["C_1"],
                    mode="lines",
                    name=r"$C_1 = \sqrt{\ln N} \cdot \prod_{p \leq N} (1 - \frac{1}{2p})$",
                    line=dict(color="blue"),
                )
            )

        title = (
            f"Сравнение \ различных \ вариантов \ сэмплирования, функция \ {function}"
        )
        yaxis_title = col
        if function == "f(N)":
            yaxis_title = yaxis_title.replace("m(N)", "m_f(N)")
        else:
            yaxis_title = yaxis_title.replace("m(N)", "m_g(N)")

        # 📐 Настройки графика
        fig.update_layout(
            width=1200,
            height=750,
            xaxis_title="N",
            yaxis_title=f"${yaxis_title}$",
            template="plotly_white",
            title=dict(
                text=f"<span style='font-size:20px;'>${title}$</span>",
                x=0.5,
                xanchor="center",
            ),
            legend=dict(
                orientation="h",  # горизонтально
                yanchor="bottom",
                y=-0.3,  # ниже графика
                xanchor="center",
                x=0.5,
                font=dict(size=12),
            ),
        )

        title_for_saving = (title.replace("\\", "").replace("  ", " ") + f", {col}, {i}").replace("\\", "").replace("^", "")
        safe_write(f"./graphs/{title_for_saving}.pdf", fig.write_image)
        safe_write(f"./graphs/{title_for_saving}.html", fig.write_html, include_mathjax="cdn")

        fig.show()
        del fig

def compare_diff_from_real_value(function, save=False):

    # 🎨 Цвета
    colors = [
        # "#e41a1c",  # red
        # "#377eb8",  # blue
        # "#4daf4a",  # green
        "#984ea3",  # purple
        "#ff7f00",  # orange
        "#ffff33",  # yellow
        "#a65628",  # brown
        "#f781bf",  # pink
        "#999999",  # grey
    ]

    combination_lengths = (
        df[df["function"] == function]
        .groupby(["computation", "continuous sampling", "stratified sampling"])
        .size()
        .sort_values()
        .reset_index()
    )

    # 📈 Создание графика
    graph = go.Figure()
    hist = go.Figure()
    
    real_values = df[(df["function"] == function) & (df["computation"] == "exact")]
    length = len(real_values)
    
    for i in range(1, len(combination_lengths)):
        color = colors[i % len(colors)]
        row = combination_lengths.iloc[i]
        mask = (
            (df["function"] == function)
            & (df["computation"] == row["computation"])
            & (df["continuous sampling"] == row["continuous sampling"])
            & (df["stratified sampling"] == row["stratified sampling"])
        )
        temp_data = df[mask].iloc[:length]

        # Основная линия
        graph.add_trace(
            go.Scatter(
                x=temp_data["N"],
                y=temp_data["m(N)"] - real_values["m(N)"],
                mode="lines",
                name=f"computation = {row['computation']}, continuous sampling = {row['continuous sampling']}, stratified sampling = {row['stratified sampling']}",
                line=dict(color=color),
            )
        )

        # Доверительный интервал
        graph.add_trace(
            go.Scatter(
                x=pd.concat([temp_data["N"], temp_data["N"][::-1]]),
                y=pd.concat(
                    [temp_data["m(N)_high"] - real_values["m(N)"], temp_data["m(N)_low"][::-1] - real_values["m(N)"][::-1]]
                ),
                fill="toself",
                fillcolor=hex_to_rgba(color, 0.2),
                line=dict(color="rgba(0,0,0,0)"),
                hoverinfo="skip",
                showlegend=False,
            )
        )

        # Гистограмма
        hist.add_trace(go.Histogram(
            x=temp_data["m(N)"] - real_values["m(N)"],
            name=f"continuous sampling = {row['continuous sampling']}, stratified sampling = {row['stratified sampling']}",
            marker_color=color,
            opacity=0.5,
            nbinsx=50
        ))
        del temp_data

    graph_title = (
        f"Отклонение \ оценок \ m(N) \ от \ истинного \ значения, функция \ {function}"
    )
    yaxis_title = "m_f(N)" if function == "f(N)" else "m_g(N)"

    # 📐 Настройки графика
    graph.update_layout(
        width=1200,
        height=750,
        xaxis_title="N",
        yaxis_title=f"${yaxis_title}$",
        template="plotly_white",
        title=dict(
            text=f"<span style='font-size:20px;'>${graph_title}$</span>",
            x=0.5,
            xanchor="center",
        ),
        legend=dict(
            orientation="h",  # горизонтально
            yanchor="bottom",
            y=-0.3,  # ниже графика
            xanchor="center",
            x=0.5,
            font=dict(size=12),
        ),
    )

    # Гистограмма
    hist_title = f"Распределение \ отклонений \ оценок \ m(N) \ от \ истинного \ значения, функция \ {function}$"
    hist.update_layout(
        barmode='overlay',  # чтобы гистограммы накладывались друг на друга
        # xaxis_title="Отклонение m(N) от истинного значения",
        yaxis_title='Частота',
        title=dict(
            text=f"<span style='font-size:20px;'>${hist_title}$</span>",
            x=0.5,
            xanchor="center",
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.4,
            xanchor="center",
            x=0.5
        )
    )

    if save:
        graph_title_for_saving = graph_title.replace("\\", "").replace("  ", " ").replace("^", "")
        hist_title_for_saving = hist_title.replace("\\", "").replace("  ", " ").replace("^", "")
        
        safe_write(f"./graphs/{graph_title_for_saving}.pdf", graph.write_image)
        safe_write(f"./graphs/{graph_title_for_saving}.html", graph.write_html, include_mathjax="cdn")
        safe_write(f"./graphs/{hist_title_for_saving}.pdf", hist.write_image)
        safe_write(f"./graphs/{hist_title_for_saving}.html", hist.write_html, include_mathjax="cdn")

    graph.show()
    hist.show()
    del graph

# Графики для оригинальной функции $f(N)$

In [None]:
computation = "exact"
function = "f(N)"
mask = (df["computation"] == computation) & (df["function"] == function)
plot_single_line(
    df[mask],
    function=function,
    computation=computation,
    plot_theory_C=True,
    save=True,
    values="C",
    yaxis_title=r"m_f(N) \cdot \sqrt{\ln N}",
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = False
stratified = False

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="C", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="C", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="C", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="C", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = False

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="C", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="C", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="C", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="C", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)
df[mask]["C"].iloc[-1]

# Для версии функции $g(N) = f(N) \cdot \mu ^2 (N)$

In [None]:
computation = "exact"
function = "f(N) \cdot \mu ^2 (N)"
mask = (df["computation"] == computation) & (df["function"] == function)
plot_single_line(
    df[mask],
    function=function,
    computation=computation,
    plot_theory_C=False,
    save=True,
    values="m(N)",
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = False

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="m(N)", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="m(N)", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(df[mask], function, continuous, stratified, col="m(N)", save=True)
plot_confidence_interval(
    df[mask], function, continuous, stratified, col="m(N)", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = False

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(
    df[mask].iloc[:5_000_000], function, continuous, stratified, col="m(N)", save=True
)
plot_confidence_interval(
    df[mask].iloc[:5_000_000], function, continuous, stratified, col="m(N)", window=10, save=True
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_confidence_interval(
    df[mask].iloc[:5_000_000], function, continuous, stratified, col="m(N)", save=True
)
plot_confidence_interval(
    df[mask].iloc[:5_000_000], function, continuous, stratified, col="m(N)", window=10, save=True
)

In [None]:
%who

# Сравнение разных видов сэмплирования

In [None]:
compare_random_samplings(function="f(N)", col="m(N)")

In [None]:
compare_random_samplings(function="f(N)", col="C")

In [None]:
compare_random_samplings(function="f(N) \cdot \mu ^2 (N)", col="m(N)")

## Отклонение сэмплирования от истинного значения

In [None]:
compare_diff_from_real_value(function="f(N)", save=True)

In [None]:
compare_diff_from_real_value(function="f(N) \cdot \mu ^2 (N)", save=True)

# Проверяем асимптотику $m_f(N)$

## $m_f(N) \sim \dfrac{1}{(\ln N)^C}$

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * np.sqrt(np.log(df[mask]["N"])),
    yaxis_title=r"m_f(N) \cdot \sqrt{\ln N}",
    plot_theory_C=False,
    save=True,
)

## Степень точно не 0.5

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{\ln N} \dfrac{1}{m_f(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{\ln N} \dfrac{1}{m_f(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

C_ln_N = np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"])[-1]
C_ln_N

In [None]:
Markdown(f"Похоже на $m_f(N) \sim \dfrac{{1}}{{(\ln N)^{{C_{{\ln N}}}}}} = \dfrac{{1}}{{(\ln N)^{{{C_ln_N}}}}}$")

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (np.log(df[mask]["N"]) ** C_ln_N),
    yaxis_title=r"m_f(N) \cdot (\ln N)^{C_{\ln N}}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (np.log(df[mask]["N"]) ** C_ln_N),
    yaxis_title=r"m_f(N) \cdot (\ln N)^{C_{\ln N}}",
    plot_theory_C=False,
    save=True,
)

# Угадываем асимптотику для $g(N) = f(N) \cdot \mu ^2 (N)$

## $m_g(N) \sim \dfrac{1}{(\ln N)^C}$

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * np.sqrt(np.log(df[mask]["N"])),
    yaxis_title=r"m_g(N) \cdot \sqrt{\ln N}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * np.sqrt(np.log(df[mask]["N"])),
    yaxis_title=r"m_g(N) \cdot \sqrt{\ln N}",
    plot_theory_C=False,
    save=True,
)

## Степень точно не 0.5

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{\ln N} \dfrac{1}{m_g(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{\ln N} \dfrac{1}{m_g(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

C_ln_N = np.emath.logn(np.log(df[mask]["N"]), 1 / df[mask]["m(N)"])[-1]
C_ln_N

In [None]:
Markdown(f"Похоже на $m_g(N) \sim \dfrac{{1}}{{(\ln N)^{{C_{{\ln N}}}}}} = \dfrac{{1}}{{(\ln N)^{{{C_ln_N}}}}}$")

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (np.log(df[mask]["N"]) ** C_ln_N),
    yaxis_title=r"m_g(N) \cdot (\ln N)^{C_{\ln N}}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (np.log(df[mask]["N"]) ** C_ln_N),
    yaxis_title=r"m_g(N) \cdot (\ln N)^{C_{\ln N}}",
    plot_theory_C=False,
    save=True,
)

## $m(N) \sim \dfrac{1}{N^C}$

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * np.sqrt(df[mask]["N"]),
    yaxis_title=r"m_g(N) \cdot \sqrt{N}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * np.sqrt(df[mask]["N"]),
    yaxis_title=r"m_g(N) \cdot \sqrt{N}",
    plot_theory_C=False,
    save=True,
)

## Степень точно не 0.5

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(df[mask]["N"], 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{N} \dfrac{1}{m_g(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=np.emath.logn(df[mask]["N"], 1 / df[mask]["m(N)"]),
    yaxis_title=r"\log_{N} \dfrac{1}{m_g(N)}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

C_N = np.emath.logn(df[mask]["N"], 1 / df[mask]["m(N)"])[-1]
C_N

In [None]:
Markdown(f"Похоже на $m_g(N) \sim \dfrac{{1}}{{N^{{C_N}}}} = \dfrac{{1}}{{N^{{{C_N}}}}}$")

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = False
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (df[mask]["N"] ** C_N),
    yaxis_title=r"m_g(N) \cdot N^{C_N}",
    plot_theory_C=False,
    save=True,
)

In [None]:
computation = "random sampling"
function = "f(N) \cdot \mu ^2 (N)"
continuous = True
stratified = True

mask = (
    (df["computation"] == computation)
    & (df["function"] == function)
    & (df["continuous sampling"] == continuous)
    & (df["stratified sampling"] == stratified)
)

plot_single_line(
    df[mask],
    computation=computation,
    function=function,
    values=df[mask]["m(N)"] * (df[mask]["N"] ** C_N),
    yaxis_title=r"m_g(N) \cdot N^{C_N}",
    plot_theory_C=False,
    save=True,
)

## Слишком быстро растет, непохоже на правду