In [1]:
import numpy as np
from typing import List
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.auto import tqdm

import sys
sys.path.append("..")
from src.data import Generator
from src.graph.knn import GraphKnn
from src.graph.dist import GraphDist

In [2]:
n: int = 100
cnt: int = 50
shape_param: np.ndarray = np.linspace(0.01, 2, cnt)  # для Gamma(shape=0.5, scale=λ)
alpha_param: np.ndarray = np.linspace(0.01, 2, cnt)  # для Pareto(α)
mc_iter: int = 20

knn_f_two: List[float] = []
knn_h_two: List[float] = []
dist_f_two: List[float] = []
dist_h_two: List[float] = []
for i in tqdm(range(cnt)):
    gen: Generator = Generator(v=3, alpha=alpha_param[i], shape=shape_param[i], size=n)
    knn_f_two.append(np.mean([GraphKnn(gen.get_f_two()).calc_connected_components() for _ in range(mc_iter)]))
    knn_h_two.append(np.mean([GraphKnn(gen.get_h_two()).calc_connected_components() for _ in range(mc_iter)]))

    dist_f_two.append(
        np.mean([GraphDist(gen.get_f_two()).calc_chromatic_number() for _ in range(mc_iter)])
    )
    dist_h_two.append(
        np.mean([GraphDist(gen.get_h_two()).calc_chromatic_number() for _ in range(mc_iter)])
    )

  0%|          | 0/50 [00:00<?, ?it/s]

In [3]:
fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Количество компонент связности", "Хроматическое число"),
    horizontal_spacing=0.1,
)

fig.add_trace(
    go.Scatter(
        x=shape_param,
        y=knn_f_two,
        mode="lines+markers",
        name="KNN Pareto(α)",
        line=dict(color="blue", width=2),
        marker=dict(size=4),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=shape_param,
        y=knn_h_two,
        mode="lines+markers",
        name="KNN Gamma(½, λ)",
        line=dict(color="red", width=2),
        marker=dict(size=4),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=shape_param,
        y=dist_f_two,
        mode="lines+markers",
        name="Distance Pareto(α)",
        line=dict(color="green", width=2),
        marker=dict(size=4),
        showlegend=True,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Scatter(
        x=shape_param,
        y=dist_h_two,
        mode="lines+markers",
        name="Distance Gamma(½, λ)",
        line=dict(color="orange", width=2),
        marker=dict(size=4),
        showlegend=True,
    ),
    row=1,
    col=2,
)

fig.update_xaxes(title_text="shape / α", row=1, col=1)
fig.update_xaxes(title_text="shape / α", row=1, col=2)
fig.update_yaxes(title_text="Количество компонент связности", row=1, col=1)
fig.update_yaxes(title_text="Хроматическое число", row=1, col=2)

fig.update_layout(
    title_text="Сравнение метрик для графов: зависимость от shape и α (Pareto vs Gamma)",
    title_x=0.5,
    width=1400,
    height=600,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()

In [4]:
n: np.ndarray = np.linspace(25, 150, 27)
k: np.ndarray = np.linspace(2, 11, 10)
d: np.ndarray = np.linspace(0.1, 10, 10)
mc_iter: int = 20

knn_f_two: List[List[float]] = [[] for _ in range(len(n))]
knn_h_two: List[List[float]] = [[] for _ in range(len(n))]
dist_f_two: List[List[float]] = [[] for _ in range(len(n))]
dist_h_two: List[List[float]] = [[] for _ in range(len(n))]
for i in tqdm(range(len(n))):
    gen: Generator = Generator(v=3, alpha=3, shape=np.sqrt(2/3), size=int(n[i]))
    for j in range(len(k)):
        knn_f_two[i].append(
            np.mean(
                [GraphKnn(gen.get_f_two(), int(k[j])).calc_connected_components() for _ in range(mc_iter)]
            )
        )
        knn_h_two[i].append(
            np.mean(
                [GraphKnn(gen.get_h_two(), int(k[j])).calc_connected_components() for _ in range(mc_iter)]
            )
        )

        dist_f_two[i].append(
            np.mean(
                [GraphDist(gen.get_f_two(), d[j]).calc_chromatic_number() for _ in range(mc_iter)]
            )
        )
        dist_h_two[i].append(
            np.mean(
                [GraphDist(gen.get_h_two(), d[j]).calc_chromatic_number() for _ in range(mc_iter)]
            )
        )

  0%|          | 0/27 [00:00<?, ?it/s]

In [5]:
N_knn: np.ndarray
K_knn: np.ndarray
N_knn, K_knn = np.meshgrid(n, k, indexing="ij")
knn_f_two_array: np.ndarray = np.array(knn_f_two)
knn_h_two_array: np.ndarray = np.array(knn_h_two)

N_dist: np.ndarray
D_dist: np.ndarray
N_dist, D_dist = np.meshgrid(n, d, indexing="ij")
dist_f_two_array: np.ndarray = np.array(dist_f_two)
dist_h_two_array: np.ndarray = np.array(dist_h_two)

fig = make_subplots(
    rows=1,
    cols=2,
    specs=[[{"type": "surface"}, {"type": "surface"}]],
    subplot_titles=(
        "3D KNN Metric (n, k, компоненты связности)",
        "3D Distance Metric (n, d, хроматическое число)",
    ),
    horizontal_spacing=0.05,
)

fig.add_trace(
    go.Surface(
        x=N_knn,
        y=K_knn,
        z=knn_f_two_array,
        name="KNN Pareto",
        colorscale="Blues",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Surface(
        x=N_knn,
        y=K_knn,
        z=knn_h_two_array,
        name="KNN Gamma",
        colorscale="Reds",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Surface(
        x=N_dist,
        y=D_dist,
        z=dist_f_two_array,
        name="Distance Pareto",
        colorscale="Greens",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Surface(
        x=N_dist,
        y=D_dist,
        z=dist_h_two_array,
        name="Distance Gamma",
        colorscale="Oranges",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=2,
)

fig.update_layout(
    scene=dict(
        xaxis_title="n (размер графа)",
        yaxis_title="k (количество соседей)",
        zaxis_title="Количество компонент связности",
        camera=dict(eye=dict(x=1.2, y=1.2, z=1.2)),
    ),
    scene2=dict(
        xaxis_title="n (размер графа)",
        yaxis_title="d (пороговое расстояние)",
        zaxis_title="Хроматическое число",
        camera=dict(eye=dict(x=1.2, y=1.2, z=1.2)),
    ),
    title_text="3D Визуализация метрик: влияние параметров графа (Pareto vs Gamma)",
    title_x=0.5,
    width=1400,
    height=700,
    showlegend=True,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()

### Distance
Видно, что при росте n и фиксированных остальных параметрах, хроматическое число растет для распределений Парето и Гамма

In [6]:
n: int = 100
cnt: int = 50
shape: float = np.sqrt(2/3)  # λ₀ = sqrt(⅔)
alpha: float = 3  # α₀ = 3
mc_iter: int = 500

gen: Generator = Generator(v=3, alpha=alpha, shape=shape, size=n)
knn_f_two: List[float] = [GraphKnn(gen.get_f_two()).calc_connected_components() for _ in tqdm(range(mc_iter))]
knn_h_two: List[float] = [GraphKnn(gen.get_h_two()).calc_connected_components() for _ in tqdm(range(mc_iter))]
    
dist_f_two: List[float] = [GraphDist(gen.get_f_two()).calc_chromatic_number() for _ in tqdm(range(mc_iter))]
dist_h_two: List[float] = [GraphDist(gen.get_h_two()).calc_chromatic_number() for _ in tqdm(range(mc_iter))]

a: float = 0.05
knn_thr: float = sorted(knn_f_two)[int((1-a)*len(knn_f_two))]
dist_thr: float = sorted(dist_f_two)[int((1-a)*len(dist_f_two))]

knn_pow: int = sum(np.array(knn_h_two) > knn_thr)
dist_pow: int = sum(np.array(dist_h_two) > dist_thr)

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [7]:
dist_pow

np.int64(4)

In [8]:
fig = make_subplots(
    rows=2,
    cols=2,
    subplot_titles=(
        "Количество компонент связности: Распределения и трешолд",
        "Хроматическое число: Распределения и трешолд",
        "KNN: ROC-подобная кривая",
        "Distance: ROC-подобная кривая",
    ),
    specs=[
        [{"secondary_y": False}, {"secondary_y": False}],
        [{"secondary_y": False}, {"secondary_y": False}],
    ],
    vertical_spacing=0.12,
    horizontal_spacing=0.1,
)

fig.add_trace(
    go.Histogram(
        x=knn_f_two,
        name="KNN Pareto (H0)",
        opacity=0.7,
        nbinsx=30,
        marker_color="blue",
        histnorm="probability density",
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Histogram(
        x=knn_h_two,
        name="KNN Gamma (H1)",
        opacity=0.7,
        nbinsx=30,
        marker_color="red",
        histnorm="probability density",
    ),
    row=1,
    col=1,
)

fig.add_vline(
    x=knn_thr,
    line=dict(color="green", width=3, dash="dash"),
    annotation_text=f"Трешолд: {knn_thr:.3f}",
    row=1,
    col=1,
)

fig.add_trace(
    go.Histogram(
        x=dist_f_two,
        name="Distance Pareto (H0)",
        opacity=0.7,
        nbinsx=30,
        marker_color="lightblue",
        histnorm="probability density",
        showlegend=False,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Histogram(
        x=dist_h_two,
        name="Distance Gamma (H1)",
        opacity=0.7,
        nbinsx=30,
        marker_color="orange",
        histnorm="probability density",
        showlegend=False,
    ),
    row=1,
    col=2,
)

fig.add_vline(
    x=dist_thr,
    line=dict(color="green", width=3, dash="dash"),
    annotation_text=f"Трешолд: {dist_thr:.3f}",
    row=1,
    col=2,
)

thresholds_knn: np.ndarray = np.linspace(min(knn_f_two), max(knn_f_two), 50)
power_knn: List[float] = [
    sum(np.array(knn_h_two) > thr) / len(knn_h_two) for thr in thresholds_knn
]
fpr_knn: List[float] = [
    sum(np.array(knn_f_two) > thr) / len(knn_f_two) for thr in thresholds_knn
]

fig.add_trace(
    go.Scatter(
        x=fpr_knn,
        y=power_knn,
        mode="lines+markers",
        name="KNN ROC",
        line=dict(color="purple", width=2),
        marker=dict(size=4),
        showlegend=False,
    ),
    row=2,
    col=1,
)

current_fpr_knn: float = sum(np.array(knn_f_two) > knn_thr) / len(knn_f_two)
current_power_knn: float = sum(np.array(knn_h_two) > knn_thr) / len(knn_h_two)
fig.add_trace(
    go.Scatter(
        x=[current_fpr_knn],
        y=[current_power_knn],
        mode="markers",
        marker=dict(size=12, color="red", symbol="star"),
        name=f"α={a}",
        showlegend=False,
    ),
    row=2,
    col=1,
)

thresholds_dist: np.ndarray = np.linspace(min(dist_f_two), max(dist_f_two), 50)
power_dist: List[float] = [
    sum(np.array(dist_h_two) > thr) / len(dist_h_two) for thr in thresholds_dist
]
fpr_dist: List[float] = [
    sum(np.array(dist_f_two) > thr) / len(dist_f_two) for thr in thresholds_dist
]

fig.add_trace(
    go.Scatter(
        x=fpr_dist,
        y=power_dist,
        mode="lines+markers",
        name="Distance ROC",
        line=dict(color="darkgreen", width=2),
        marker=dict(size=4),
        showlegend=False,
    ),
    row=2,
    col=2,
)

current_fpr_dist: float = sum(np.array(dist_f_two) > dist_thr) / len(dist_f_two)
current_power_dist: float = sum(np.array(dist_h_two) > dist_thr) / len(dist_h_two)
fig.add_trace(
    go.Scatter(
        x=[current_fpr_dist],
        y=[current_power_dist],
        mode="markers",
        marker=dict(size=12, color="red", symbol="star"),
        name=f"α={a}",
        showlegend=False,
    ),
    row=2,
    col=2,
)

fig.update_xaxes(title_text="Значение метрики", row=1, col=1)
fig.update_xaxes(title_text="Значение метрики", row=1, col=2)
fig.update_xaxes(title_text="Ложноположительная частота (FPR)", row=2, col=1)
fig.update_xaxes(title_text="Ложноположительная частота (FPR)", row=2, col=2)

fig.update_yaxes(title_text="Плотность вероятности", row=1, col=1)
fig.update_yaxes(title_text="Плотность вероятности", row=1, col=2)
fig.update_yaxes(title_text="Мощность (TPR)", row=2, col=1)
fig.update_yaxes(title_text="Мощность (TPR)", row=2, col=2)

fig.update_layout(
    title_text=f"Анализ статистических тестов с трешолдом: Pareto vs Gamma (α={a}, n={n}, iter={mc_iter})",
    title_x=0.5,
    width=1300,
    height=800,
    showlegend=True,
    legend=dict(yanchor="top", y=0.98, xanchor="left", x=0.01),
)

fig.show()