In [1]:
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.cluster import KMeans
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist

In [3]:
X, y = make_classification(n_samples=1000, n_features=2, n_classes=2, n_clusters_per_class=2, random_state=42, n_redundant=0)
print(X[:5], y[:5])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

[[-0.99910178 -0.66386   ]
 [ 1.24668618  1.15359685]
 [ 0.96277683  0.85939747]
 [-2.95744095  2.03364529]
 [ 1.14116527  1.05944863]] [1 1 1 1 1]


In [25]:
num_neurons = 10

kmeans = KMeans(n_clusters=num_neurons, random_state=42, n_init=10)

kmeans.fit(X_train)

centers = kmeans.cluster_centers_

print(centers)

[[-2.19051237  0.75827088]
 [-0.39406292  0.07094792]
 [ 0.65075103 -1.08578374]
 [ 0.94451706  1.01653303]
 [-0.80031699  1.37314485]
 [-1.47724876 -2.36079823]
 [ 1.93174133 -1.06121315]
 [ 2.0894322   2.18769235]
 [-2.0352836   2.71303376]
 [-0.89782982 -0.92802662]]


In [7]:
std_dev = np.mean(cdist(centers, centers, 'euclidean')) / np.sqrt(2*num_neurons)
print(std_dev)

0.5930068138491337


In [8]:
def rbf_activation(X, centers, std_dev):
    return np.exp(-cdist(X, centers, 'sqeuclidean') / (2 * std_dev**2))

In [19]:
rbf_train = rbf_activation(X_train, centers, std_dev)
rbf_test = rbf_activation(X_test, centers, std_dev)

rbf_train = np.hstack([rbf_train, np.ones((rbf_train.shape[0], 1))])
rbf_test = np.hstack([rbf_test, np.ones((rbf_test.shape[0], 1))])



In [20]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(rbf_train, y_train)

y_pred = clf.predict(rbf_test)

accuracy = accuracy_score(y_pred, y_test)

print("Accuracy: ", accuracy*100)

Accuracy:  88.5
