In [16]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from scipy.signal import convolve2d
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import  coo_matrix

In [2]:
(data, label), _ = mnist.load_data()
print('Shape of data:', data.shape)
print('Number of labels:', len(label))

Shape of data: (60000, 28, 28)
Number of labels: 60000


# preprocessing

In [3]:
def smooth(img, filter_size=3, num_iter=2):
    average_filter = np.ones((filter_size, filter_size)) / (filter_size ** 2)

    for i in range(num_iter):
        smoothed_img = convolve2d(img, average_filter, mode='same')

    return smoothed_img

data_smoothed = np.array([smooth(img) for img in data])

# top 50 pca

In [4]:
data_flatten = data_smoothed.reshape(data_smoothed.shape[0], -1)

pca = PCA(n_components=50)
data_pca = pca.fit_transform(data_flatten)

data_pca.shape

(60000, 50)

# graph construction

In [17]:
def graph_construction(X, k):
    num_points = X.shape[0]
    W = np.zeros((num_points, num_points))
    
    nearest_neighbors = NearestNeighbors(n_neighbors= k+1).fit(X)
    # exclude the point itself
    neighbors = nearest_neighbors.kneighbors(X, n_neighbors= k+1, return_distance=False)[:, 1:]
    for i in range(num_points):
        W[i, neighbors[i]] = 1
    W = coo_matrix(W)
    W = W + W.T
    W = W.sign()

    return W