In [7]:
import numpy as np
from sklearn import datasets
import tensorflow as tf
from sklearn.semi_supervised import LabelPropagation
from sklearn.model_selection import train_test_split
import scipy as sp
from scipy.spatial.distance import pdist, squareform
import networkx as nx
import matplotlib.pyplot as plt
from numpy.linalg import inv

In [8]:
def get_iris_data():
    """ Read the iris data and label/unlabel data points"""
    # load iris data
    iris   = datasets.load_iris()
    data   = iris["data"]
    labels = iris["target"]
    
    # get label 0 and 1, and corresponding data
    labels = labels[labels < 2]
    data = data[np.where(labels < 2)]
    
    # generate random numbers for unlabeling
    rng = np.random.RandomState(42)
    random_unlabeled_points = rng.rand(len(labels)) < 0.5
    masked_labels = np.copy(labels).astype(float)
    
    # keep labels for points that will be unlabeled
    unlabeled_y_sol = np.copy(labels[random_unlabeled_points])

    # unlabel points
    masked_labels[random_unlabeled_points] = 0.5
    
    unlabeled_indices = np.where(random_unlabeled_points)
    
    # separate labeled/unlabeled Y
    unlabeled_y = masked_labels[random_unlabeled_points]
    labeled_y = np.delete(masked_labels,unlabeled_indices)
    
    # separate labeled/unlabeled X
    unlabeled_X = data[unlabeled_indices]
    labeled_X = np.delete(data,unlabeled_indices,axis=0)
    
    return labeled_X, labeled_y, unlabeled_X, unlabeled_y, unlabeled_y_sol

In [9]:
def check_symmetric(a, tol=1e-8):
    return np.allclose(a, a.T, atol=tol)

In [10]:
def get_weights(X):
    
    def get_neighbors(arr,threshold=0.95):
        # index = arr.argsort()[:-5][::-1]
        index = np.where(arr > 0.95)
        arr[index] = 0
        return arr
    
    # use rdf kernel to estimate weights
    pairwise_dists = squareform(pdist(X, 'euclidean'))
    s = np.var(X)
    K = sp.exp(-pairwise_dists ** 2 / s ** 2)
    np.fill_diagonal(K, 0)
    K = np.apply_along_axis(get_neighbors, 0, K)

    return K

In [11]:
def show_graph_with_labels(adjacency_matrix):
    rows, cols = np.where(adjacency_matrix > 0)
    edges = zip(rows.tolist(), cols.tolist())
    gr = nx.Graph()
    gr.add_edges_from(edges)
    nx.draw_networkx(gr,node_size=50,with_labels=False)
    plt.show()

In [12]:
def label_propagation(X,Ly,Uy):
    W = get_weights(X)
    T = W / np.sum(W, axis=1)
    Tnorm = T / np.sum(T, axis=1)
    Tuu_norm = Tnorm[len(LX):,len(LX):]
    Tul_norm = Tnorm[len(LX):,:len(LX)]
    Uy_lp = inv((np.identity(len(Tuu_norm))-Tuu_norm)) @ Tul_norm @ Ly
    return Uy_lp

In [82]:
def label_propagation_iter(X,Ly,Uy,iter_):
    W = get_weights(X)
    T = W / np.sum(W, axis=1)

    Y = np.hstack((Ly,Uy))
    print(Y.shape)

    for i in range(iter_):
        Y = np.dot(T,Y)
        Y[:len(Ly)] = Ly

    return(Y[len(Ly):])

In [83]:
LX, Ly, UX, Uy, Uy_sol = get_iris_data()
features = np.vstack((LX,UX))
labels = np.hstack((Ly,Uy))
labels_sol = np.hstack((Ly,Uy_sol))
weights = get_weights(features)
num_labeled = len(Ly)
num_unlabeled = len(Uy)
num_nodes = num_labeled+num_unlabeled


In [84]:
np.mean(np.rint(Uy) == Uy_sol)

0.52830188679245282

In [85]:
Uy_lp = label_propagation(features,Ly,Uy)

In [86]:
np.mean(np.rint(Uy_lp) == Uy_sol)

0.73584905660377353

In [87]:
Uy_lp_iter = label_propagation_iter(features,Ly,Uy,1)

(100,)


In [88]:
np.mean(np.rint(Uy_lp_iter) == Uy_sol)

0.73584905660377353

In [91]:
np.mean(np.rint(Uy_lp_iter) == np.rint(Uy_lp))

0.8867924528301887

In [24]:
# create input matrix 
LY = np.tile(Ly,(Ly.shape[0],1))
UY = np.tile(Uy,(Ly.shape[0],1))

# mask diagonal elements for labeled data
np.fill_diagonal(LY, 0.5)

label_input = np.hstack((LY, UY))
label_input_test = np.hstack((Ly,Uy))

unlabeled_ = np.hstack((np.identity(LY.shape[0]),np.ones((Ly.shape[0],Uy.shape[0]))))
unlabeled_test = np.hstack((np.zeros(LY.shape[0]),np.ones((Uy.shape[0]))))

labeled_ = 1 - unlabeled_
labeled_test = 1 - unlabeled_test

masked_ = np.hstack((np.identity(LY.shape[0]),np.zeros((Ly.shape[0],Uy.shape[0]))))

# TODO: shaffle input

In [26]:
## NN without features ##

RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)


def init_weights(weights_np):
    """ Weight initialization """
    weights = tf.convert_to_tensor(weights_np, np.float32)
    return tf.Variable(weights)

In [29]:
def forwardprop(X, w, T, one_hot, reverse_one_hot):
    normalized_w = w / tf.reduce_sum(w, axis = 1)
    trueX = X
    for i in range(T):
        h = tf.tensordot(X, normalized_w, axes = 1)
        h = tf.multiply(h, unlabeled) + tf.multiply(trueX, labeled)
        X = h
    return X

In [48]:
# Layer's sizes
T = 1
x_size = num_nodes # Number of input nodes: number of labeled nodes
h_size = num_nodes # Number of hidden nodes: number of labeled nodes
y_size = num_nodes # Number of outcomes: number of labeled nodes

# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
unlabeled = tf.placeholder("float", shape=[None, y_size])
labeled = tf.placeholder("float", shape=[None, y_size])
masked = tf.placeholder("float", shape=[None, y_size])

# Weight initializations
w = init_weights(weights)

# Forward propagation
yhat    = forwardprop(X, w, T, unlabeled, labeled)

In [49]:
# Backward propagation
a = yhat
cost = tf.reduce_mean(tf.multiply(masked, tf.multiply(y, tf.log(yhat))))
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

In [64]:
label_output = np.tile(labels,(label_input.shape[0],1))
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
pred = sess.run(yhat, feed_dict={X:label_input_test.reshape(1,100),
                            y:label_output[1:2],
                            unlabeled:unlabeled_test.reshape(1,100), 
                            labeled:labeled_test.reshape(1,100),
                            masked:masked_})
sess.close()

In [65]:
pred[:,47:]

array([[ 0.63205785,  0.65269452,  0.48617205,  0.62007231,  0.51780617,
         0.48384222,  0.41829225,  0.42737427,  0.46051186,  0.47398618,
         0.55859816,  0.61287117,  0.51327324,  0.63316816,  0.62441987,
         0.63727266,  0.65139943,  0.59741127,  0.45018771,  0.53588921,
         0.65993875,  0.63687885,  0.64576942,  0.43879244,  0.54238153,
         0.50175887,  0.54327172,  0.66183281,  0.55509508,  0.51465738,
         0.55217838,  0.50951147,  0.53151327,  0.50645667,  0.52554333,
         0.50762373,  0.56985164,  0.60600609,  0.44995159,  0.60664725,
         0.60118467,  0.50075853,  0.49210307,  0.50340104,  0.63130033,
         0.58319086,  0.53866142,  0.51396751,  0.5143382 ,  0.5164274 ,
         0.49103931,  0.51721019,  0.49282163]], dtype=float32)

In [66]:
Uy_sol

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1])

In [67]:
np.mean(np.rint(pred[:,47:]) == Uy_sol)

0.54716981132075471

In [174]:
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

label_output = np.reshape(labels,(1,len(labels)))

for epoch in range(1):
    # Train with each example
    for i in range(len(LY)):
        
        sess.run(yhat, feed_dict={X:label_input[i: i + 1],
                                y:label_output,
                                one_hot:one_hot_[i: i + 1], 
                                reverse_one_hot:reverse_one_hot_[i: i + 1]})
sess.close()

In [164]:
label_output.shape

(1, 100)

In [139]:
label_output = np.reshape(labels,(1,len(labels))),one_ho

In [29]:
## NN with features ##

def forwardprop_with_features(X, T, Features, Theta):
    
    w = tf.sigmoid(tf.transpose(Features) * Theta * Features)
    
    normalized_w = w / tf.reduce_sum(w, axis = 1)
    for i in range(T):
        h = X * normalized_w
        #normalized_h = h / tf.reshape(tf.reduce_sum(h,axis=1),[47,1])
        X = h
    return X

In [6]:
# Layer's sizes
T = 2
num_features = features.shape[1]
x_size = num_nodes # Number of input nodes: number of labeled nodes
h_size = num_nodes # Number of hidden nodes: number of labeled nodes
y_size = num_nodes # Number of outcomes: number of labeled nodes

# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
one_hot = tf.placeholder("float", shape=[None, y_size])
Features = tf.constant(features)

# Weight initializations # TODO num_features
Theta = tf.Variable(tf.cast(tf.diag(np.random.uniform(size=num_features)),tf.float32))
print(Theta)
# Forward propagation
yhat    = forwardprop_with_features(X, T, Features, Theta)

NameError: name 'features' is not defined

array([[ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.8,  3.1,  1.6,  0.2],
       [ 5.5,  4.2,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5. ,  3.2,  1.2,  0.2],
       [ 4.4,  3. ,  1.3,  0.2],
       [ 5. ,  3.5,  1.6,  0.6],
       [ 4.8,  3. ,  1.4,  0.3],
       [ 4.6,  3.2,  1.4,  0.2],
       [ 5.3,  3.7,  1.5,  0.2],
       [ 7. ,  3.2,  4.7,  1.4],
       [ 6.4,  3.2,  4.5,  1.5],
       [ 6.9,  3.1,  4.9,  1.5],
       [ 5.5,  2.3,  4. ,  1.3],
       [ 6.5,  2.8,  4.6,  1.5],
       [ 5.7,  2.8,  4.5,  1.3],
       [ 6. ,  2.2,  4. ,  1. ],
       [ 6.7,  3.1,  4.4,  1.4],
       [ 5

In [13]:
num_nodes

100

In [14]:
# Backward propagation
cost = tf.reduce_mean(tf.multiply(one_hot,tf.multiply(y, tf.log(yhat))))
#cost = tf.reduce_mean(tf.multiply(one_hot,tf.multiply(y, tf.log(yhat))))
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

In [15]:
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(1):
    # Train with each example
    for i in range(len(LY)):
        X_ = np.hstack((LY_input[i: i + 1][0][:num_labeled],Uy))
        X_ = np.reshape(X_,(1,len(X_)))
        y_ = np.hstack((Ly,Uy))
        y_ = np.reshape(X_,(1,len(y_)))
        one_hot_ = np.hstack((LY_input[i: i + 1][0][num_labeled:],np.zeros(len(Uy))))
        one_hot_ = np.reshape(one_hot_,(1,len(one_hot_)))
        print(sess.run(updates, feed_dict={X:X_ , y:y_ , one_hot:one_hot_}))
sess.close()

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
