In [51]:
import numpy as np
from scipy.sparse import linalg as spla
from scipy.io import loadmat
import graphlearning as gl
from matplotlib import pyplot as plt

In [46]:
def get_dataset():
    img = loadmat('Indian_pines_corrected.mat')['indian_pines_corrected']
    gt = loadmat('Indian_pines_gt.mat')['indian_pines_gt']
    
    # Save dataset
    X = img.reshape(-1, 200)
    gl.save_dataset(X, 'indian_pines', overwrite=True)

    # Save labels
    L = gt.reshape(-1)
    gl.save_labels(L, 'indian_pines', overwrite=True)

    # To add a dataset to the simulation environment, we also need
    # to save a label permutation, which is a number of random train/test splits
    # and store some precomputed knn-data

    # Create label permutation with 100 trials at 1,2,3,4,5 labels per class
    # You can add any identifying string as name='...' if you need to create additional
    # label permutations for a dataset.
#     gl.create_label_permutations(L, 100, [1,2,3,4,5], dataset='indian_pines', name=None, overwrite=True)

    # Run knn search and save info on 30 nearest neighbors
    # Choose as many as you are likely to use in practice, the code will automatically subset if needed.
    # This uses a kd-tree. For higher dimensional data use the annoy package, as below
    I, J, D = gl.knnsearch_annoy(X, 30, dataset='indian_pines')
    # I, J, D = gl.knnsearch(X, 30, dataset='indian_pines')
    return I, J, D
    
I, J, D = get_dataset()

kNN search with Annoy approximate nearest neighbor package...
Progress: |██████████████████████████████████████████████████| 100.0% Complete


<img src="high_level_overview.png"/>

In [54]:
def loop():
    labels = gl.load_labels('indian_pines')
    train_ind = gl.randomize_labels(labels, 1)
    tau = .1
    
    pred_labels = []
    acc = 0
    while acc < .01:
        # Get Weight Matrix and Adjusted Graph Laplacian
        W = gl.weight_matrix(I, J, D, 10)
        L = gl.graph_laplacian(W, norm='none') + tau**2*gl.sparse.identity(W.shape[0])
        
        # Run SSL
        train_labels = labels[train_ind]
        pred_labels = gl.graph_ssl(W, train_ind, train_labels, algorithm='laplace')
        # Calculate Accuracy
        acc = np.sum(pred_labels == labels) / len(labels)
        print('Accuracy:', acc)
        
        def J_l()
        
        w, v = spla.eigs(L, 50)
        A = np.random.randn()
        
        
    return acc, pred_labels

In [55]:
loop()

[ 8580 10396  6873     9  6824 11315 16160 10550  8692  9447  8049   552
  2066 16994 17924  1253  2947]
Accuracy: 0.2182639714625446
(21025, 50)


(0.2182639714625446, array([ 3,  0, 10, ...,  5, 15, 15], dtype=uint8))