In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spi
import sklearn.preprocessing as skp
import sklearn.neighbors as skn
import sklearn.metrics as skmt
import scipy.sparse as spr
rng = np.random.default_rng(0)
scaler = skp.StandardScaler()

In [None]:
dataset = "newthyroid"

In [None]:
file = open(dataset + ".csv", "w")
csvwriter = csv.writer(file)
csvwriter.writerow(["k*", "ari", "nnmi"])
file.close()

In [None]:
temp_data = np.loadtxt("keel_datasets/" + dataset + ".dat", dtype = str, delimiter = ",", comments = "@")
data = np.array(temp_data[:, :-1], dtype = float)
label = skp.LabelEncoder().fit_transform(temp_data[:, -1])

In [None]:
n, d = data.shape
p = 0.1
gamma = 10000
nu = 1
k = 10
phi = 0.001
N = 100
tol = 0.0085

t = np.arange(n)

In [None]:
def add_noise(X):
    n, d = X.shape
    r = int(p * n)
    noise = rng.uniform(0, 1, (r, d))
    noise = X.min(0) + (X.max(0) - X.min(0)) * noise
    X = np.concatenate((X, noise))
    X = scaler.fit_transform(X)
    return X

In [None]:
def calculate_I(X):
    n = X.shape[0]
    temp = skn.kneighbors_graph(X, n_neighbors = k)
    temp = temp.toarray()

    I = []
    for i in range(n):
        for j in range(i + 1, n):
            if temp[i, j] == 1:
                I.append(np.array([i, j]))
    I = np.array(I)

    return I

In [None]:
def calculate_W(X, I):
    s = I.shape[0]
    W = np.zeros(s)
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        dist = ((X[l1] - X[l2]) * (X[l1] - X[l2])).sum()
        W[l] = np.exp((-phi) * dist)

    return W

In [None]:
def calculate_U(X, I, V, L):
    n, d = X.shape
    s = I.shape[0]
    s1 = np.zeros([n, d])
    s2 = np.zeros([n, n])
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        e = np.zeros(n)
        e[l1] = 1
        e[l2] = -1
        s1 = s1 + np.array([e]).T @ np.array([V[l]])
        s2 = s2 + np.array([e]).T @ np.array([e])
    MU = X + nu * s1
    M = np.identity(n) + nu * s2
    U = np.linalg.inv(M) @ MU
    return U

In [None]:
def calculate_V(I, W, U, L):
    n, d = U.shape
    s = I.shape[0]
    V = np.zeros([s, d])
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        sigma = (gamma * W[l]) / nu
        temp = U[l1] - U[l2] - (1 / nu) * L[l]
        V[l] = np.maximum(0, np.abs(temp) - sigma) * np.sign(temp)
    return V

In [None]:
def calculate_L(I, U, V, L):
    s = I.shape[0]
    new_L = np.copy(L)
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        new_L[l] = new_L[l] + nu * (V[l] - U[l1] + U[l2])
    return new_L

In [None]:
k_star = []
ari = []
nnmi = []

for m in range(15):
    
    X = add_noise(data)
    X = scaler.fit_transform(X)
    
    I = calculate_I(X)
    W = calculate_W(X, I)
    n, d = X.shape
    s = I.shape[0]

    U = np.copy(X)
    V = np.zeros([s, d])
    for l in range(s):
        l1 = I[l, 0]
        l2 = I[l, 1]
        V[l] = U[l1] - U[l2]
    L = np.zeros([s, d])

    for i in range(N):
        U = calculate_U(X, I, V, L)
        temp_V = calculate_V(I, W, U, L)
        L = calculate_L(I, U, temp_V, L)
        V = np.copy(temp_V)

    W_U = skmt.pairwise_distances(U) ** 2
    eta = tol * (W_U.sum() / (n ** 2))

    adj = (W_U <= eta).astype(int)
    temp = spr.csr_matrix(adj)
    r, Z = spr.csgraph.connected_components(temp, directed = False)

    Z = Z[t]

    k_star.append(np.unique(Z).shape[0])
    ari.append(skmt.adjusted_rand_score(label, Z))
    nnmi.append(skmt.adjusted_mutual_info_score(label, Z))

    print((m + 1), end = " ")

In [None]:
file = open(dataset + ".csv", "a")
csvwriter = csv.writer(file)
for m in range(m):
    csvwriter.writerow([k_star[m], ari[m], nnmi[m]])
file.close()