In [93]:
import pickle
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn.datasets import make_moons
from sklearn.decomposition import PCA
from scipy.linalg import fractional_matrix_power

np.random.default_rng(123).shuffle(random_index)

def load_data(file):
    data_list = []
    label_list = []
    for i in range(1,6):
        with open(f"{file}/data_batch_{i}", 'rb') as f:
            _data = pickle.load(f, encoding="bytes")
            #import pdb; pdb.set_trace()
            data_list.append(_data[b'data'])
            label_list.append(_data[b'labels'])
    return (data_list, label_list)
            
data_cifar10 = load_data("./cifar10")

In [94]:
data, label = data_cifar10
data = np.concatenate(data)
label = np.concatenate(label)
data= (data - np.mean(data, axis=1)[:,np.newaxis])/(np.var(data, axis=1)[:,np.newaxis])

selected_labels = [2, 4, 8, 9]

selected_data = np.zeros(data.shape[0]).astype(bool)

for l in selected_labels:
    selected_data = np.bitwise_or(selected_data, label == l)
    

data = data[selected_data]
label[label==2] -= 2
label[label==4] -= 3
label[label==8] -= 6
label[label==9] -= 6
label = label[selected_data]

In [95]:

alpha = 0.99
sigma = 0.1
num_iter = 1000
num_label_per_cls = 10

num_data = data.shape[0]

In [96]:
label[100:200]

array([3, 0, 1, 3, 3, 3, 2, 0, 0, 0, 2, 2, 3, 1, 1, 0, 0, 3, 3, 2, 1, 3,
       3, 3, 2, 0, 1, 3, 1, 1, 1, 2, 0, 3, 0, 3, 1, 2, 3, 1, 1, 2, 1, 0,
       3, 1, 2, 1, 1, 0, 0, 0, 3, 3, 2, 0, 1, 2, 3, 1, 0, 0, 0, 3, 1, 2,
       1, 1, 3, 2, 2, 3, 3, 1, 1, 2, 1, 2, 2, 0, 2, 3, 0, 2, 1, 3, 0, 1,
       2, 2, 1, 1, 3, 2, 2, 0, 2, 1, 2, 3])

In [97]:
random_index = np.arange(num_data)
np.random.shuffle(random_index)
data = data[random_index]
label = label[random_index]

In [98]:
def pca_dim_reduce(X, output_dim=20):
    
    pca = PCA(n_components = output_dim)
    newData = pca.fit_transform(X)
    return newData



def generate_affinity_matrix(X, Sigma=0.01,metric="L2"):
    W = np.sum((X[np.newaxis,:,:] - X[:,np.newaxis,:])**2, axis=-1)
    #import pdb;pdb.set_trace()
    W = np.exp(- W / (2*Sigma**2))
    np.fill_diagonal(W, 0)
    D_n12 = np.sum(W, axis=1)**(-1/2)
    D_n1 = np.sum(W, axis=1)**(-1)
    D_n12 = np.diag(D_n12)
    D_n1 = np.diag(D_n1)
    return W, D_n12, D_n1

def walk_matrix(X, Sigma=0.01, m_type='norm'):
    if m_type == 'norm':
        W, D_n12, _ = generate_affinity_matrix(X, Sigma=Sigma)
        S = D_n12 @ W @ D_n12
        return S
    elif m_type == 'random':
        W, _, D_n1 = generate_affinity_matrix(X, Sigma=Sigma)
        S = W @ D_n1
        return  S
    else:
        raise NotImplemented()

def label_matrix(label_vec):
    #one-hot
    n_values = np.max(label_vec) + 1
    one_hot_y = np.eye(n_values)[label_vec]
    return one_hot_y

        
def lgc(S, Y, alpha=0.99,num_I=400):
    
    # Iteration 0/ F_0 = Y
    F = Y
    for t in range(num_I):
        F = S @ F * alpha + (1-alpha)*Y
    
    return F



In [99]:
from time import time
import pandas as pd
#data_exp = data

In [92]:
data_dict = {"Label Number":[], "Dim Number":[], "Time":[], "Acc":[]}
for dim in (8, 16, 32, 64, 128):
    data_exp = pca_dim_reduce(data, output_dim=dim)
    for labeled_num in (10, 20, 50, 100, 200, 500):
        sample_num = 5000


        _s = time()
        Y = np.zeros([sample_num, 4])
        data_exp = data_exp[:sample_num]
        ##data_exp = (data[:sample_num] - np.mean(data[:sample_num]))/(np.var(data[:sample_num]))
        target = label[:sample_num]
        matrix = label_matrix(label[:labeled_num])
        Y[:labeled_num] = matrix
        S = walk_matrix(data_exp, Sigma=sigma, m_type='norm')
        F = lgc(S, Y, alpha, num_iter)

        pred_y = np.argmax(F, axis=-1)
        data_dict["Label Number"].append(labeled_num)
        data_dict["Dim Number"].append(dim)
        print(f"For {labeled_num} Labels")
        print(f"with {dim} DIMS")
        acc = np.mean(pred_y[labeled_num:] == target[labeled_num:])
        data_dict["Acc"].append(acc)
        data_dict['Time'].append(time() - _s)
        print(f"Acc{data_dict['Acc'][-1]}")
        print(f"Time{data_dict['Time'][-1]}")

out_df = pd.DataFrame.from_dict(data_dict)
out_df.to_csv('label-dim-2489.csv', index=False)


KeyboardInterrupt: 

In [100]:
labeled_num = 500
data_dict = {"Alpha":[], "Sigma":[], "Time":[], "Acc":[]}
data_exp = pca_dim_reduce(data, output_dim=64)
sample_num = 5000
Y = np.zeros([sample_num, 4])
data_exp = data_exp[:sample_num]
##data_exp = (data[:sample_num] - np.mean(data[:sample_num]))/(np.var(data[:sample_num]))
target = label[:sample_num]
matrix = label_matrix(label[:labeled_num])
Y[:labeled_num] = matrix

for a_f in [0.999, 0.99, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0.001]:
    for s_a in [100, 10, 1, 0.1, 0.01, 0.001, 0.0001]:
        _s = time()
        S = walk_matrix(data_exp, Sigma=s_a, m_type='norm')
        F = lgc(S, Y, a_f, num_I=2000)
        pred_y = np.argmax(F, axis=-1)
        data_dict["Alpha"].append(a_f)
        data_dict["Sigma"].append(s_a)
        print(f"For {a_f} MIU")
        print(f"with {s_a} Sigma")
        #acc = np.mean(pred_y == target)
        acc = np.mean(pred_y[labeled_num:] == target[labeled_num:])
        data_dict["Acc"].append(acc)
        data_dict['Time'].append(time() - _s)
        print(f"Acc{data_dict['Acc'][-1]}")
        print(f"Time{data_dict['Time'][-1]}")

out_df = pd.DataFrame.from_dict(data_dict)
out_df.to_csv('miu-sigma-64-500-remove.csv', index=False)

For 0.999 MIU
with 100 Sigma
Acc0.25244444444444447
Time18.273845911026
For 0.999 MIU
with 10 Sigma
Acc0.25244444444444447
Time18.304597854614258
For 0.999 MIU
with 1 Sigma
Acc0.24155555555555555
Time18.292303800582886
For 0.999 MIU
with 0.1 Sigma
Acc0.49
Time18.310458421707153


  
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


For 0.999 MIU
with 0.01 Sigma
Acc0.25244444444444447
Time18.2416090965271
For 0.999 MIU
with 0.001 Sigma
Acc0.25244444444444447
Time18.273045539855957
For 0.999 MIU
with 0.0001 Sigma
Acc0.25244444444444447
Time18.243876218795776
For 0.99 MIU
with 100 Sigma
Acc0.25244444444444447
Time18.386842250823975
For 0.99 MIU
with 10 Sigma
Acc0.25244444444444447
Time18.286447048187256
For 0.99 MIU
with 1 Sigma
Acc0.24155555555555555
Time18.311567544937134
For 0.99 MIU
with 0.1 Sigma
Acc0.5257777777777778
Time18.2831609249115
For 0.99 MIU
with 0.01 Sigma
Acc0.25244444444444447
Time18.255253791809082
For 0.99 MIU
with 0.001 Sigma
Acc0.25244444444444447
Time18.244733572006226
For 0.99 MIU
with 0.0001 Sigma
Acc0.25244444444444447
Time18.250364065170288
For 0.9 MIU
with 100 Sigma
Acc0.25244444444444447
Time18.26825451850891
For 0.9 MIU
with 10 Sigma
Acc0.25244444444444447
Time18.302055835723877
For 0.9 MIU
with 1 Sigma
Acc0.24155555555555555
Time18.280114889144897
For 0.9 MIU
with 0.1 Sigma
Acc0.556
Ti

In [103]:
labeled_num = 500
data_dict = {"Alpha":[], "Sigma":[], "Time":[], "Acc":[]}
data_exp = pca_dim_reduce(data, output_dim=64)
sample_num = 5000
Y = np.zeros([sample_num, 4])
data_exp = data_exp[:sample_num]
##data_exp = (data[:sample_num] - np.mean(data[:sample_num]))/(np.var(data[:sample_num]))
target = label[:sample_num]
matrix = label_matrix(label[:labeled_num])
Y[:labeled_num] = matrix

for a_f in [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
    for s_a in [0.6,0.5,0.4, 0.3, 0.2, 0.1, 0.08, 0.05]:
        _s = time()
        S = walk_matrix(data_exp, Sigma=s_a, m_type='norm')
        F = lgc(S, Y, a_f, num_I=2000)
        pred_y = np.argmax(F, axis=-1)
        data_dict["Alpha"].append(a_f)
        data_dict["Sigma"].append(s_a)
        print(f"For {a_f} MIU")
        print(f"with {s_a} Sigma")
        #acc = np.mean(pred_y == target)
        acc = np.mean(pred_y[labeled_num:] == target[labeled_num:])
        data_dict["Acc"].append(acc)
        data_dict['Time'].append(time() - _s)
        print(f"Acc{data_dict['Acc'][-1]}")
        print(f"Time{data_dict['Time'][-1]}")

out_df = pd.DataFrame.from_dict(data_dict)
out_df.to_csv('miu-sigma-64-500-norm-fine.csv', index=False)

For 0.9 MIU
with 0.6 Sigma
Acc0.24333333333333335
Time18.642460346221924
For 0.9 MIU
with 0.5 Sigma
Acc0.25
Time18.56238865852356
For 0.9 MIU
with 0.4 Sigma
Acc0.2937777777777778
Time18.60278010368347
For 0.9 MIU
with 0.3 Sigma
Acc0.42288888888888887
Time18.529457807540894
For 0.9 MIU
with 0.2 Sigma
Acc0.5348888888888889
Time18.589747190475464
For 0.9 MIU
with 0.1 Sigma
Acc0.5575555555555556
Time18.52461338043213
For 0.9 MIU
with 0.08 Sigma
Acc0.5455555555555556
Time18.588767528533936


  
  from ipykernel import kernelapp as app


For 0.9 MIU
with 0.05 Sigma
Acc0.25244444444444447
Time18.56649112701416
For 0.8 MIU
with 0.6 Sigma
Acc0.262
Time18.58839750289917
For 0.8 MIU
with 0.5 Sigma
Acc0.3022222222222222
Time18.496426105499268
For 0.8 MIU
with 0.4 Sigma
Acc0.3824444444444444
Time18.564895629882812
For 0.8 MIU
with 0.3 Sigma
Acc0.49
Time18.510092735290527
For 0.8 MIU
with 0.2 Sigma
Acc0.5691111111111111
Time18.575661659240723
For 0.8 MIU
with 0.1 Sigma
Acc0.5555555555555556
Time18.511620044708252
For 0.8 MIU
with 0.08 Sigma
Acc0.546
Time18.573206186294556
For 0.8 MIU
with 0.05 Sigma
Acc0.25244444444444447
Time18.569940090179443
For 0.7 MIU
with 0.6 Sigma
Acc0.30777777777777776
Time18.585964918136597
For 0.7 MIU
with 0.5 Sigma
Acc0.362
Time18.523353338241577
For 0.7 MIU
with 0.4 Sigma
Acc0.43844444444444447
Time18.615071773529053
For 0.7 MIU
with 0.3 Sigma
Acc0.5188888888888888
Time18.528122186660767
For 0.7 MIU
with 0.2 Sigma
Acc0.5842222222222222
Time18.55620813369751
For 0.7 MIU
with 0.1 Sigma
Acc0.554666666