In [1]:
import numpy as np
import plotly.graph_objects as go

In [2]:
from typing import Tuple

def sqrt_uneven_dist(center: Tuple[float, float], radius: float) -> list[int]:
    angle = np.random.rand() * 2 * np.pi
    r = radius * np.random.rand()
    return [r * np.cos(angle) + center[0], r * np.sin(angle) + center[1]]

In [3]:
tmp = np.array([sqrt_uneven_dist((0, 0), 1) for _ in range(1000)])

plt = go.Figure()

plt.update_xaxes(range=[-1.5, 1.5], zeroline=False)
plt.update_yaxes(range=[-1.5, 1.5])

plt.add_trace(go.Scatter(x=tmp[:, 0], y=tmp[:, 1], mode='markers'))

plt.add_shape(type="circle",
    xref="x", yref="y",
    x0=-1, y0=-1, x1=1, y1=1,
    line_color="LightSeaGreen",
)

plt.update_layout(
    autosize=False,
    width=600,
    height=600
)
plt.show()

Создание <i>N</i> случайных групп по 40 точек, обучение моделей

In [82]:
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

classes = []
circles = []
N = 2
blobs = 40
for i in range(N):
    center = (2 + np.random.rand() * 6, 2 + np.random.rand() * 6)
    radius = 1 + np.random.rand()
    circles.append((center, radius))
    classes.append(np.array([sqrt_uneven_dist(center, radius) for _ in range(blobs)]))

X = classes[0]
y = np.zeros(blobs, int)
for i in range(1, N):
    X = np.concatenate([X, classes[i]])
    y = np.concatenate([y, np.ones(blobs, int) * i])

SVM = svm.SVC(kernel="linear")
SVM.fit(X, y)

KNN = KNeighborsClassifier()
KNN.fit(X, y)

KNeighborsClassifier()

Визуализация

In [83]:
def add_trace(clf: str, plt: 'plotly.graph_objs._figure.Figure', df: 'pandas.core.frame.DataFrame', i: int) -> None:
    df_i = df[df[clf] == i]
    plt.add_trace(go.Scatter(x=df_i['x'],
                             y=df_i['y'],
                             hoverinfo='skip',
                             mode='markers',
                             legendgroup=int(clf == 'KNN'),
                             marker_size=3,
                             name=f'{clf} prediction class {i + 1}'))

In [84]:
plt = go.Figure()

plt.update_xaxes(range=[0, 9], zeroline=False, visible=False)
plt.update_yaxes(range=[0, 9], zeroline=False, visible=False)

space_size=75
xx = np.linspace(0, 9, space_size)
yy = np.linspace(0, 9, space_size)
xy = np.transpose([np.tile(xx, space_size), np.repeat(yy, space_size)])

df = pd.DataFrame({'x': xy[:, 0], 'y': xy[:, 1], 'KNN': KNN.predict(xy), 'SVM': SVM.predict(xy)})
for i in range(N):
    add_trace('KNN', plt, df, i)
    add_trace('SVM', plt, df, i)

for i in range(N):
    (x, y), r = circles[i]
    plt.add_shape(type="circle",
                  xref="x", yref="y",
                  x0=x-r, y0=y-r, x1=x+r, y1=y+r,
                  name=f'class {i + 1}')
    plt.add_trace(go.Scatter(x=classes[i][:, 0],
                             y=classes[i][:, 1],
                             mode='markers',
                             name=f'class {i + 1}',
                             hoverinfo='skip',
                             legendgroup=2,
                             marker_size=12))

plt.update_layout(
    width=915,
    height=800
)
plt.show()

In [85]:
right_KNN = 0
right_SVM = 0

K = 10000
for cls, (center, radius) in enumerate(circles):
    for _ in range(K):
        test_x, test_y = sqrt_uneven_dist(center, radius)
        right_KNN += 1 if KNN.predict([[test_x, test_y]])[0] == cls else 0
        right_SVM += 1 if SVM.predict([[test_x, test_y]])[0] == cls else 0

print(f'KNN validity - {right_KNN/(N * K)}')
print(f'SVM validity - {right_SVM/(N * K)}')

KNN validity - 0.99905
SVM validity - 0.9997
