In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as skd
import sklearn.preprocessing as skp
import sklearn.neighbors as skn
import sklearn.metrics as skmt
import scipy.sparse as spr
rng = np.random.default_rng(0)
scaler = skp.StandardScaler()

In [None]:
def add_noise(X, p):
    n, d = X.shape
    r = int(p * n)
    noise = rng.uniform(0, 1, (r, d))
    noise = X.min(0) + (X.max(0) - X.min(0)) * noise
    noise_index = np.zeros(r)
    noise_index.fill(-1)
    
    new_X = np.concatenate((X, noise))
    new_label = np.concatenate((label, noise_index))
    return new_X, new_label

In [None]:
def calculate_E(X):
    n = X.shape[0]
    E = skn.kneighbors_graph(X, n_neighbors = 10, metric = "cosine")
    E = E.toarray()
    return E

In [None]:
def calculate_I(E):
    n = E.shape[0]
    
    I = []
    for i in range(n):
        for j in range(i + 1, n):
            E[i, j] = np.min([E[i, j], E[j, i]])
            E[j, i] = E[i, j]
            if E[i, j] == 1:
                I.append(np.array([i, j]))
    I = np.array(I)
    
    return I

In [None]:
def calculate_W(E, I):
    n = E.shape[0]
    s = I.shape[0]
    deg = E.sum(0)
    
    W = np.zeros(s)
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        W[l] = 1 / (n * np.sqrt(deg[l1] * deg[l2]))
    W = W * deg.sum()

    return W

In [None]:
def calculate_cost(X, I, U, gamma, mu):
    n, d = X.shape
    s = I.shape[0]
    cost = 0
    for i in range(n):
        cost = cost + 0.5 * ((X[i] - U[i]) * (X[i] - U[i])).sum()
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        temp = ((U[l1] - U[l2]) * (U[l1] - U[l2])).sum()
        cost = cost + (gamma / 2) * W[l] * ((mu * temp) / (mu + temp))
    return cost

In [None]:
def calculate_L(I, U, mu):
    s = I.shape[0]
    L = np.zeros(s)
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        temp = ((U[l1] - U[l2]) * (U[l1] - U[l2])).sum()
        L[l] = (mu / (mu + temp)) ** 2
    return L

In [None]:
def calculate_A(n, I, L):
    s = I.shape[0]
    A = np.zeros([n, n])
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        e = np.zeros(n)
        e[l1] = 1
        e[l2] = -1
        A = A + W[l] * L[l] * (np.array([e]).T @ np.array([e]))
    return A

In [None]:
def calculate_U(X, A, gamma):
    n, d = X.shape
    M = np.identity(n) + gamma * A
    U = np.linalg.inv(M) @ X
    return U

In [None]:
data, label = skd.make_blobs(n_samples = 500, random_state = 100)
# data, label = skd.make_circles(n_samples = 500, factor = 0.25, noise = 0.05, random_state = 100)
# data, label = skd.make_moons(n_samples = 500, noise = 0.05, random_state = 100)
t = np.arange(500)

for m in range(5):
    
    print("Noise :", 0.05 * m)
    print("Iterations :", end = " ")

    k_star = []
    ari = []
    nnmi = []

    for h in range(15):

        X, label = add_noise(data, 0.05 * m)
        X = scaler.fit_transform(X)
    
        n, d = X.shape
        E = calculate_E(X)
        I = calculate_I(E)
        W = calculate_W(E, I)
        n, d = X.shape
        s = I.shape[0]

        chi = np.sqrt((X * X).sum())
        temp = E * skmt.pairwise_distances(X)
        temp = np.sort(temp.flatten())
        r = int(0.02 * temp.shape[0])
        delta = temp[np.arange(r)].mean()

        U = np.copy(X)
        L = np.ones(s)
        temp = E * skmt.pairwise_distances(X)
        mu = 3 * (np.max(temp) ** 2)
        temp = calculate_A(n, I, L)
        gamma = chi / np.sqrt((temp * temp).sum())
        cost = calculate_cost(X, I, U, gamma, mu)

        condition = True
        i = 0
        while(condition):
            temp = cost
            L = calculate_L(I, U, mu)
            A = calculate_A(n, I, L)
            U = calculate_U(X, A, gamma)
            i = i + 1
            if i % 4 == 0:
                mu = np.max([mu / 2, delta / 2])
                gamma = chi / np.sqrt((A * A).sum())
            cost = calculate_cost(X, I, U, gamma, mu)
            condition = (np.absolute(cost - temp) < 0.1) or (i < 100)

        W_U = skmt.pairwise_distances(U) ** 2
        adj = (W_U <= delta).astype(int)
        temp = spr.csr_matrix(adj)
        r, Z = spr.csgraph.connected_components(temp, directed = False)

        if (m == 2) and (h == 8):

            plt.figure()
            plt.scatter(X[:, 0], X[:, 1], c = label, alpha = 0.5)
            plt.grid()
    
            plt.figure()
            plt.scatter(X[:, 0], X[:, 1], c = Z, alpha = 0.5)
            plt.grid()

        label = label[t]
        Z = Z[t]

        print((h + 1), end = " ")
        k_star.append(np.unique(Z).shape[0])
        ari.append(skmt.adjusted_rand_score(label, Z))
        nnmi.append(skmt.adjusted_mutual_info_score(label, Z))

    print()
    print("Number of clusters :", np.array(k_star).mean())
    print("ARI :", np.array(ari).mean())
    print("NNMI :", np.array(nnmi).mean())
    print()