In [1]:
import numpy as np
from typing import List
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.auto import tqdm

import sys
sys.path.append("..")
from src.data import Generator
from src.graph.knn import GraphKnn
from src.graph.dist import GraphDist

In [2]:
n: int = 100
cnt: int = 50
v: np.ndarray = np.linspace(0.01, 2, cnt)
alpha: np.ndarray = np.linspace(0.01, 2, cnt)
mc_iter: int = 20

knn_f: List[float] = []
knn_h: List[float] = []
dist_f: List[float] = []
dist_h: List[float] = []
for i in tqdm(range(cnt)):
    gen: Generator = Generator(v[i], alpha[i], size=n)
    knn_f.append(np.mean([GraphKnn(gen.get_f()).calc_metric() for _ in range(mc_iter)]))
    knn_h.append(np.mean([GraphKnn(gen.get_h()).calc_metric() for _ in range(mc_iter)]))

    dist_f.append(
        np.mean([GraphDist(gen.get_f()).calc_metric() for _ in range(mc_iter)])
    )
    dist_h.append(
        np.mean([GraphDist(gen.get_h()).calc_metric() for _ in range(mc_iter)])
    )


  0%|          | 0/50 [00:00<?, ?it/s]

  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  if abs(tmp[i][0] - tmp[j][0]) <= self.d:


In [3]:
fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Количество треугольников", "Минимальное кликовое покрытие"),
    horizontal_spacing=0.1,
)

fig.add_trace(
    go.Scatter(
        x=v,
        y=knn_f,
        mode="lines+markers",
        name="KNN Stable(α)",
        line=dict(color="blue", width=2),
        marker=dict(size=4),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=v,
        y=knn_h,
        mode="lines+markers",
        name="KNN Student-t(ν)",
        line=dict(color="red", width=2),
        marker=dict(size=4),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=v,
        y=dist_f,
        mode="lines+markers",
        name="Distance Stable(α)",
        line=dict(color="green", width=2),
        marker=dict(size=4),
        showlegend=True,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Scatter(
        x=v,
        y=dist_h,
        mode="lines+markers",
        name="Distance Student-t(ν)",
        line=dict(color="orange", width=2),
        marker=dict(size=4),
        showlegend=True,
    ),
    row=1,
    col=2,
)

fig.update_xaxes(title_text="v / α", row=1, col=1)
fig.update_xaxes(title_text="v / α", row=1, col=2)
fig.update_yaxes(title_text="Количество треугольников", row=1, col=1)
fig.update_yaxes(title_text="Минимальное кликовое покрытие", row=1, col=2)

fig.update_layout(
    title_text="Сравнение метрик для графов: зависимость от v и α",
    title_x=0.5,
    width=1400,
    height=600,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()

In [4]:
n: np.ndarray = np.linspace(25, 150, 27)
k: np.ndarray = np.linspace(2, 11, 10)
d: np.ndarray = np.linspace(0.1, 10, 10)
mc_iter: int = 20

knn_f: List[List[float]] = [[] for _ in range(len(n))]
knn_h: List[List[float]] = [[] for _ in range(len(n))]
dist_f: List[List[float]] = [[] for _ in range(len(n))]
dist_h: List[List[float]] = [[] for _ in range(len(n))]
for i in tqdm(range(len(n))):
    gen: Generator = Generator(v=3, alpha=1, size=int(n[i]))
    for j in range(len(k)):
        knn_f[i].append(
            np.mean(
                [GraphKnn(gen.get_f(), int(k[j])).calc_metric() for _ in range(mc_iter)]
            )
        )
        knn_h[i].append(
            np.mean(
                [GraphKnn(gen.get_h(), int(k[j])).calc_metric() for _ in range(mc_iter)]
            )
        )

        dist_f[i].append(
            np.mean(
                [GraphDist(gen.get_f(), d[j]).calc_metric() for _ in range(mc_iter)]
            )
        )
        dist_h[i].append(
            np.mean(
                [GraphDist(gen.get_h(), d[j]).calc_metric() for _ in range(mc_iter)]
            )
        )


  0%|          | 0/27 [00:00<?, ?it/s]

In [5]:
N_knn: np.ndarray
K_knn: np.ndarray
N_knn, K_knn = np.meshgrid(n, k, indexing="ij")
knn_f_array: np.ndarray = np.array(knn_f)
knn_h_array: np.ndarray = np.array(knn_h)

N_dist: np.ndarray
D_dist: np.ndarray
N_dist, D_dist = np.meshgrid(n, d, indexing="ij")
dist_f_array: np.ndarray = np.array(dist_f)
dist_h_array: np.ndarray = np.array(dist_h)

fig = make_subplots(
    rows=1,
    cols=2,
    specs=[[{"type": "surface"}, {"type": "surface"}]],
    subplot_titles=(
        "3D KNN Metric (n, k, metric)",
        "3D Distance Metric (n, d, metric)",
    ),
    horizontal_spacing=0.05,
)

fig.add_trace(
    go.Surface(
        x=N_knn,
        y=K_knn,
        z=knn_f_array,
        name="KNN f",
        colorscale="Blues",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Surface(
        x=N_knn,
        y=K_knn,
        z=knn_h_array,
        name="KNN h",
        colorscale="Reds",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Surface(
        x=N_dist,
        y=D_dist,
        z=dist_f_array,
        name="Distance f",
        colorscale="Greens",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Surface(
        x=N_dist,
        y=D_dist,
        z=dist_h_array,
        name="Distance h",
        colorscale="Oranges",
        opacity=0.7,
        showscale=False,
    ),
    row=1,
    col=2,
)

fig.update_layout(
    scene=dict(
        xaxis_title="n (размер графа)",
        yaxis_title="k (количество соседей)",
        zaxis_title="Количество треугольников",
        camera=dict(eye=dict(x=1.2, y=1.2, z=1.2)),
    ),
    scene2=dict(
        xaxis_title="n (размер графа)",
        yaxis_title="d (пороговое расстояние)",
        zaxis_title="Минимальное кликовое покрытие",
        camera=dict(eye=dict(x=1.2, y=1.2, z=1.2)),
    ),
    title_text="3D Визуализация метрик: влияние параметров графа",
    title_x=0.5,
    width=1400,
    height=700,
    showlegend=True,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()

### Distance
Видно, что при росте n и фиксированных остальных параметрах, размер минимального кликового покрытия растет

In [6]:
n: int = 100
cnt: int = 50
v: float = 3
alpha: float = 1
mc_iter: int = 500

gen: Generator = Generator(v, alpha, size=n)
knn_f: List[float] = [GraphKnn(gen.get_f()).calc_metric() for _ in tqdm(range(mc_iter))]
knn_h: List[float] = [GraphKnn(gen.get_h()).calc_metric() for _ in tqdm(range(mc_iter))]
    
dist_f: List[float] = [GraphDist(gen.get_f()).calc_metric() for _ in tqdm(range(mc_iter))]
dist_h: List[float] = [GraphDist(gen.get_h()).calc_metric() for _ in tqdm(range(mc_iter))]

a: float = 0.05

knn_f_mean = np.mean(knn_f)
knn_h_mean = np.mean(knn_h)
dist_f_mean = np.mean(dist_f)
dist_h_mean = np.mean(dist_h)

if knn_h_mean > knn_f_mean:
    knn_thr: float = sorted(knn_f)[int((1-a)*len(knn_f))]
else:
    knn_thr: float = sorted(knn_f)[int(a*len(knn_f))]

if dist_h_mean > dist_f_mean:
    dist_thr: float = sorted(dist_f)[int((1-a)*len(dist_f))]
else:
    dist_thr: float = sorted(dist_f)[int(a*len(dist_f))]

if knn_h_mean > knn_f_mean:
    knn_pow: int = sum(np.array(knn_h) > knn_thr)
else:
    knn_pow: int = sum(np.array(knn_h) < knn_thr)

if dist_h_mean > dist_f_mean:
    dist_pow: int = sum(np.array(dist_h) > dist_thr)
else:
    dist_pow: int = sum(np.array(dist_h) < dist_thr)


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [7]:
fig = make_subplots(
    rows=2,
    cols=2,
    subplot_titles=(
        "Количество треугольников: Распределения и трешолд",
        "Минимальное кликовое покрытие: Распределения и трешолд",
        "KNN: ROC-подобная кривая",
        "Distance: ROC-подобная кривая",
    ),
    specs=[
        [{"secondary_y": False}, {"secondary_y": False}],
        [{"secondary_y": False}, {"secondary_y": False}],
    ],
    vertical_spacing=0.12,
    horizontal_spacing=0.1,
)

fig.add_trace(
    go.Histogram(
        x=knn_f,
        name="KNN f (H0)",
        opacity=0.7,
        nbinsx=30,
        marker_color="blue",
        histnorm="probability density",
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Histogram(
        x=knn_h,
        name="KNN h (H1)",
        opacity=0.7,
        nbinsx=30,
        marker_color="red",
        histnorm="probability density",
    ),
    row=1,
    col=1,
)

fig.add_vline(
    x=knn_thr,
    line=dict(color="green", width=3, dash="dash"),
    annotation_text=f"Трешолд: {knn_thr:.3f}",
    row=1,
    col=1,
)

fig.add_trace(
    go.Histogram(
        x=dist_f,
        name="Distance f (H0)",
        opacity=0.7,
        nbinsx=30,
        marker_color="lightblue",
        histnorm="probability density",
        showlegend=False,
    ),
    row=1,
    col=2,
)

fig.add_trace(
    go.Histogram(
        x=dist_h,
        name="Distance h (H1)",
        opacity=0.7,
        nbinsx=30,
        marker_color="orange",
        histnorm="probability density",
        showlegend=False,
    ),
    row=1,
    col=2,
)

fig.add_vline(
    x=dist_thr,
    line=dict(color="green", width=3, dash="dash"),
    annotation_text=f"Трешолд: {dist_thr:.3f}",
    row=1,
    col=2,
)

thresholds_knn: np.ndarray = np.linspace(min(knn_f), max(knn_f), 50)
power_knn: List[float] = [
    sum(np.array(knn_h) > thr) / len(knn_h) for thr in thresholds_knn
]
fpr_knn: List[float] = [
    sum(np.array(knn_f) > thr) / len(knn_f) for thr in thresholds_knn
]

fig.add_trace(
    go.Scatter(
        x=fpr_knn,
        y=power_knn,
        mode="lines+markers",
        name="KNN ROC",
        line=dict(color="purple", width=2),
        marker=dict(size=4),
        showlegend=False,
    ),
    row=2,
    col=1,
)

current_fpr_knn: float = sum(np.array(knn_f) > knn_thr) / len(knn_f)
current_power_knn: float = sum(np.array(knn_h) > knn_thr) / len(knn_h)
fig.add_trace(
    go.Scatter(
        x=[current_fpr_knn],
        y=[current_power_knn],
        mode="markers",
        marker=dict(size=12, color="red", symbol="star"),
        name=f"α={a}",
        showlegend=False,
    ),
    row=2,
    col=1,
)

thresholds_dist: np.ndarray = np.linspace(min(dist_f), max(dist_f), 50)
power_dist: List[float] = [
    sum(np.array(dist_h) > thr) / len(dist_h) for thr in thresholds_dist
]
fpr_dist: List[float] = [
    sum(np.array(dist_f) > thr) / len(dist_f) for thr in thresholds_dist
]

fig.add_trace(
    go.Scatter(
        x=fpr_dist,
        y=power_dist,
        mode="lines+markers",
        name="Distance ROC",
        line=dict(color="darkgreen", width=2),
        marker=dict(size=4),
        showlegend=False,
    ),
    row=2,
    col=2,
)

current_fpr_dist: float = sum(np.array(dist_f) > dist_thr) / len(dist_f)
current_power_dist: float = sum(np.array(dist_h) > dist_thr) / len(dist_h)
fig.add_trace(
    go.Scatter(
        x=[current_fpr_dist],
        y=[current_power_dist],
        mode="markers",
        marker=dict(size=12, color="red", symbol="star"),
        name=f"α={a}",
        showlegend=False,
    ),
    row=2,
    col=2,
)

fig.update_xaxes(title_text="Значение метрики", row=1, col=1)
fig.update_xaxes(title_text="Значение метрики", row=1, col=2)
fig.update_xaxes(title_text="Ложноположительная частота (FPR)", row=2, col=1)
fig.update_xaxes(title_text="Ложноположительная частота (FPR)", row=2, col=2)

fig.update_yaxes(title_text="Плотность вероятности", row=1, col=1)
fig.update_yaxes(title_text="Плотность вероятности", row=1, col=2)
fig.update_yaxes(title_text="Мощность (TPR)", row=2, col=1)
fig.update_yaxes(title_text="Мощность (TPR)", row=2, col=2)

fig.update_layout(
    title_text=f"Анализ статистических тестов с трешолдом (α={a}, n={n}, iter={mc_iter})",
    title_x=0.5,
    width=1300,
    height=800,
    showlegend=True,
    legend=dict(yanchor="top", y=0.98, xanchor="left", x=0.01),
)

fig.show()
