In [None]:
import utils

## Initial setting

In [None]:
sample_data = 'kddcup'
h = 10
perplexity = 30
alpha = 10
labels = ['smurf', 'neptune', 'normal', 'back', 'satan']
total_iteration = 50
sample_number = 100
momentum = 0.5
R = 5
m = 1

sp = utils.SolutionPath(labels=labels
                , sample_data=sample_data
				, perplexity=perplexity
                , alpha=alpha
                , h=h
                , total_iteration = total_iteration
                , momentum = momentum
                , sample_number=sample_number
                , R=R
                , m=m)

## Prepare dataset

In [None]:
## Load KDDCup data
sp.load_data()

In [None]:
# Calculate the probability matrix P
sp.tsne_probabilities(X=sp.X_sampled, 
                    perplexity=30,
                    tol=1e-10,
                    initial_beta_coefficient=1e-4)

In [None]:
## Heatmap of ajacency matrix P (`Figure 12`)
sp.show_P_matrix()

## Solve the eigenproblem of a Laplacian matrix $L(\alpha P-H_n)$

In [None]:
## Solve eigenvalue problem
sp.getSolution()

In [None]:
## Distribution of eigenvalues(`Figure 13`)
sp.visualize_eigenvalues()

In [None]:
## Distribution of eigenvectors (`Figure 14`)
sp.visualize_vectors()

In [None]:
## Set the initial Y
sp.initialize_Y(threshold=0.1, seed=2)

In [None]:
## Average Residual Ratio(ARR) and its visualization(`Figure 9`)
k_max=60
epsilon = 0.01
fontsize = 16
n_neighbors = 10
df = sp.calcTermsByOptimization(k_max=k_max, 
                                initial_value=True,
                                n_neighbors=n_neighbors,)
sp.visualizeTermsByOptimization(df=df,
                                title=f'ARR / Trustworthiness for KDDCup1999 dataset',
                                epsilon=epsilon,
                                saved_filename='arr_plot_kddcup1999_with_TW.pdf',
                                fontsize=fontsize)

In [None]:
## correlation coefficient between ARR and Trustworthiness
df['value'].corr(df['trustworthiness'])

In [None]:
## Average Residual Ratio(ARR) and its visualization(`Figure 9`)
sp.drawScatterPlot(df=df, 
                   epsilon=epsilon, 
                   legend=False,
                   fontsize=fontsize)

## Visualize the solution

In [None]:
## Viaualize the solution path with 3 x 3 grid
sp.initialize_Y(threshold=0.2)
sp.visualize_SolutionPath_For_GMM(k0 = 20, k1 =25, k2 = 35)

In [None]:
## Comparison of the low-dimensional representation among GD/MM/NAG.(`Figure 1`)
sp.initialize_Y(threshold=0.1)
sp.visualize_SolutionPath(k0 = 5, k1 = 22, k2 = 30, k3 = 70)

In [None]:
## Experiments of various random initialization (`Figure 7`)
ks = range(0, 100, 5)

df = sp.calc_ARI_with_initial_values(ks=ks,
                                trials=30,
                                threshold=0.1,)
sp.visualize_ARI(df=df)