In [None]:
import utils

## Initialize MNIST

In [None]:
sample_data = 'mnist'
h = 10
perplexity = 25
sample_number = 400
alpha = 8
labels = ['2', '4', '6', '8']
total_iteration = 50
momentum = 0.5
R = 4
m = 1

sp = utils.SolutionPath(labels=labels
                , sample_data=sample_data
				, perplexity=perplexity
                , alpha=alpha
                , h=h
                , total_iteration = total_iteration
                , momentum = momentum
                , sample_number = sample_number
                , R = R
                , m = m)

In [None]:
## Load data
sp.load_data()

In [None]:
# Calculate the probability matrix P
sp.tsne_probabilities(X=sp.X_sampled, 
                    perplexity=30,
                    tol=1e-10,
                    initial_beta_coefficient=1e-4)

In [None]:
## SHow the P matrix
sp.show_P_matrix()

In [None]:
## Solve eigenvalue problem
sp.getSolution()

In [None]:
## Show the solutions
sp.visualize_vectors()

## VIsualization of MNIST

In [None]:
## Initializa Y (Low-dimensional expression with random values)
sp.initialize_Y(threshold=0.1, seed=10)

In [None]:
# Average Residual Ratio(ARR) and its visualization(`Figure 2`)
epsilon = 0.01
k_max =300
fontsize = 20
n_neighbors = 10
df = sp.calcTermsByOptimization(k_max=k_max, 
                                initial_value=True,
                                n_neighbors=n_neighbors,)
sp.visualizeTermsByOptimization(df=df,
                                title=f'ARR / Trustworthiness for MNIST dataset',
                                epsilon=epsilon,
                                saved_filename='arr_plot_mnist_with_TW.pdf',
                                fontsize=fontsize)

In [None]:
## Correlation coefficient between ARR and Trustworthiness
df['trustworthiness'].corr(df['value'])

In [None]:
## Viaualize the solution path
## k0, k1, k2 are the indices of the solution path
sp.initialize_Y(threshold=0.1,
                seed=10)
sp.visualize_SolutionPath_For_GMM(k0 = 20, k1 =26, k2 = 40)

In [None]:
# visualize the scatter plot with ARR = 0.01
fontsize = 20
sp.set_k(df=df, epsilon=epsilon)
sp.drawScatterPlot(df=df, 
                   epsilon=epsilon, 
                   legend=True,
                   fontsize=fontsize,)

In [None]:
## populate gif with both EE and embedding stages
## It takes several hours to generate the gif file.
#sp.getSolutionPathWithEmbeddingStageForALL(random_seed_for_initialization=1,
#                                           total_iteration=50,
#                                           output_gif='mnist_clustering_200.gif')

In [None]:
## This process needs previous step `getSolutionPathWithEmbeddingStageForALL` to be done
## Corresponding to Figure 2
#sp.drawScatterPlotWithEmbedding(df=df,
#                                epsilon=epsilon,
#                                legend=True,
#                                final_k=200,
#                                fontsize=fontsize)

## variation of momentum coefficients

In [None]:
## ARR with various momentum coefficients(`Figure 11`)
k_max = 50
epsilon = 0.01
momentum_coefficients = [0.1, 0.3, 0.5, 0.7, 0.9]
df = sp.calcTermsByOptimizationForMM(k_max=k_max,
                                   momentum_coefficients=momentum_coefficients,)
sp.visualizeTermsByOptimizationForMM(df=df,
                                    epsilon=epsilon,
                                    momentum_coefficients=momentum_coefficients,
                                    title='ARR with MM for MNIST dataset',
                                    legend_title='momentum coefficient',
                                    saved_filename='arr_plot_mnist_mm.pdf')


In [None]:
## Experiments of various random initialization (`Figure 7`)
ks = range(0, 100, 5)
sp.momentum = 0.5
threshold = 0.10

df = sp.calc_ARI_with_initial_values(ks=ks,
                                trials=30,
                                threshold=threshold)
sp.visualize_ARI(df=df)

## Experiments of various initialization methods

In [None]:
initialization_methods = ['random', 'pca', 'se', 'mds']
n_neighbors = 300  ## Parameter for Spectral Embedding
ks = range(0, 100, 5)

df = sp.calc_ARI_with_various_initial_values(initialization_methods=initialization_methods,
                                    ks=ks,
                                    trials=10,
                                    n_neighbors=n_neighbors,)

In [None]:
## Visualize experiments results with various initialization (`Figure 8`)
initialization_methods = ['random', 'pca', 'se', 'mds']
fontsize = 16
sp.drawARIWithInitialValues(df=df,
                            initialization_methods=initialization_methods,
                            saved_filename='ari_plot_mnist_initialize.pdf',
                            fontsize=fontsize)