In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from LP import LP
from DeepLP import DeepLP
from DeepLP_Sparse import DeepLP_Sparse
from DeepLP_RBF import DeepLP_RBF
from DeepLP_WeightedRBF import DeepLP_WeightedRBF
from DeepLP_WeightedRBF_t import DeepLP_WeightedRBF_t
import pandas as pd

from utils import *

iris = False
cora = True
label_prob = 0.05
hard = True
percentile = 3

sigma = 3
theta = np.array([ 1 ,  1,  1,  1 ])

### Iris Data

In [2]:
if iris:
    # get labels, features and weights
    # load iris data
    iris   = datasets.load_iris()
    data   = iris["data"]
    labels = iris["target"]

    # get label 0 and 1, and corresponding data
    labels = labels[labels < 2]
    data = data[np.where(labels < 2)]

In [3]:
if cora:
    nodes = np.loadtxt('cora/cora_selected.csv',delimiter=',')
    graph = np.loadtxt('cora/cora_graph.csv',delimiter=',')
    id_    = nodes[:,0]
    labels = nodes[:,1]
    data   = nodes[:,2:]

### Cora Data

### Data Preparation

In [4]:
LX, Ly, UX, Uy, Uy_sol = random_unlabel(data,labels,label_prob=label_prob,hard=hard)
features = np.vstack((LX,UX))
if iris:
    weights = rbf_kernel(features,s=sigma,percentile=percentile)
if cora:
    weights = rbf_kernel(features,G=graph)

true_labels = np.hstack((Ly,Uy_sol))
num_features = features.shape[1]
num_labeled = len(Ly)
num_unlabeled = len(Uy)
num_nodes = num_labeled+num_unlabeled
if iris:
    graph = (weights > 0).astype(int)

  if G == None:


In [5]:
# prepare features for NN
LY = np.tile(Ly,(Ly.shape[0],1))
np.fill_diagonal(LY, 0.5)
UY = np.tile(Uy,(Ly.shape[0],1))

masked_ = np.hstack((np.identity(LY.shape[0]),np.zeros((Ly.shape[0],Uy.shape[0]))))
true_labeled = np.array([1] * LY.shape[0] + [0] * Uy.shape[0]).reshape(1,num_nodes)

In [6]:
unlabeled_test = np.hstack((np.zeros(LY.shape[0]),np.ones((Uy.shape[0])))).reshape(1,num_nodes)
test_data = {
    'X': np.hstack((Ly,Uy)).reshape(1,num_nodes),
    'y': np.tile(true_labels,(Ly.shape[0],1))[1:2],
    'unlabeled': unlabeled_test,
    'labeled': (1 - unlabeled_test).reshape(1,num_nodes),
    'true_labeled': true_labeled,
    'masked':masked_[0:1]
}

In [7]:
unlabeled_ = np.hstack((np.identity(LY.shape[0]),np.ones((Ly.shape[0],Uy.shape[0]))))
data = {
    'X':np.hstack((LY, UY)),
    'y':np.reshape(true_labels,(1,len(true_labels))),
    'unlabeled':unlabeled_,
    'labeled':1-unlabeled_,
    'true_labeled': true_labeled,
    'masked':masked_
}

In [30]:
a=np.sum(weights,axis=0)
a[a < 0.001]

array([ 0.00091188,  0.00091188,  0.00091188,  0.00091188,  0.00091188,
        0.00091188,  0.00091188,  0.00091188,  0.00091188,  0.00091188,
        0.00012341,  0.00033546,  0.00091188,  0.00033546,  0.00033546,
        0.00091188,  0.00091188])

# Label Propagation

In [36]:
lp = LP(num_nodes)
# Uy_lp = np.rint(lp.closed(weights,Ly))
Uy_lp_iter = np.rint(lp.iter_(weights,Ly,Uy,5000))

In [37]:
print("[Accuracy] Iter Solution:", accuracy(Uy_sol, Uy_lp_iter))
print("[RMSE] Iter solution", rmse(Uy_sol, Uy_lp_iter))

[Accuracy] Iter Solution: 0.506235385814
[RMSE] Iter solution 7.59664848012


In [11]:
# print("[Accuracy] Closed solution:", accuracy(Uy_sol, Uy_lp), " Iter Solution:", accuracy(Uy_sol, Uy_lp_iter))
# print("[RMSE] Closed solution", rmse(Uy_sol, Uy_lp), " Iter solution", rmse(Uy_sol, Uy_lp))

[Accuracy] Closed solution: 0.697194076383  Iter Solution: 0.697194076383
[RMSE] Closed solution 0.302805923617  Iter solution 0.302805923617


In [35]:
for iter_ in [1,2,3,4,5,10,100,1000]:
    print(iter_)
    Uy_lp_iter = np.rint(lp.iter_(weights,Ly,Uy,iter_))
    print("Objective:", objective(Ly,np.rint(Uy_lp_iter),weights))
    print("Accuracy:", accuracy(Uy_sol,Uy_lp_iter))
    print("RMSE:", rmse(Uy_sol,Uy_lp_iter))

1
Objective: 711.135740091
Accuracy: 0.458690568979
RMSE: 0.887763055339
2
Objective: 423.779284557
Accuracy: 0.187451286048
RMSE: 0.844115354638
3
Objective: 393.026518654
Accuracy: 0.457521434139
RMSE: 0.772408417771
4
Objective: 323.772794078
Accuracy: 0.295791114575
RMSE: 0.779812938426
5
Objective: 311.032565399
Accuracy: 0.458300857366
RMSE: 0.747856586126
10
Objective: 253.149580936
Accuracy: 0.394388152767
RMSE: 0.784099766173
100
Objective: 149.288375247
Accuracy: 0.496492595479
RMSE: 1.16173031956
1000
Objective: 188.02783309
Accuracy: 0.508573655495
RMSE: 3.32540919719


# DeepLP

In [11]:
dlp = DeepLP(10, num_nodes, weights, 1)

Tensor("while/matmul:0", shape=(?, 100), dtype=float32)


In [12]:
pred_dlp = dlp.labelprop(test_data)
Uy_dlp = np.rint(pred_dlp[:,num_labeled:]).reshape(num_unlabeled)

In [13]:
print("Accuracy:", accuracy(Uy_sol,Uy_dlp))
print("RMSE:", rmse(Uy_sol,Uy_dlp))
print("Objective:", objective(Ly,Uy_dlp,dlp.get_val(dlp.W)))

Accuracy: 0.621052631579
RMSE: 0.378947368421
Objective: 168.968111634


# DeepLP_RBF

In [38]:
dlp_rbf = DeepLP_RBF(100, num_nodes, features, graph, sigma, lr=0.01)