In [1]:
import numpy as np

import scipy as sp
import scipy.sparse
import scipy.sparse.linalg

import sklearn
import sklearn.preprocessing

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import pickle
import networkx as nx

In [2]:
# twitter dataset

with open("ovchinnikov-rutwitterdataset/A.pkl", "rb") as file:
    A = np.load(file, encoding="latin1")

with open("ovchinnikov-rutwitterdataset/labeled_nodes.pkl", "rb") as file:
    labels = np.load(file, encoding="latin1")

with open("ovchinnikov-rutwitterdataset/i2t.pkl", "rb") as file:
    i2t = pickle.load(file)

with open("ovchinnikov-rutwitterdataset/t2i.pkl", "rb") as file:
    t2i = pickle.load(file)

In [3]:
# preprocessing

F = sklearn.preprocessing.normalize(A, axis=1, norm='l1')
B = sklearn.preprocessing.normalize(A.T, axis=1, norm='l1')
d = np.array(labels.todense())[:, 0]



In [4]:
a1 = 0.7
a2 = 0.7
a3 = 0.7

In [5]:
# we are looking for solution x == T(x)

def T(F, B, d, x):
    return a1 * F.dot(x.clip(0)) + a2 * B.dot(x.clip(-np.inf, 0)) + a3 * d

In [6]:
def h(x):
    ans = np.zeros_like(x)
    ans[x > 0.0] = 1.0
    return ans

In [7]:
def obj(x):
    return np.linalg.norm(x - T(F, B, d, x)) ** 2 / 2

In [8]:
def Jac(x):
    return sp.sparse.eye(F.shape[0], format='csr') - a1 * F.multiply(sp.sparse.csc_matrix(h(x).reshape(-1, 1))) - a2 * B.multiply(sp.sparse.csc_matrix(h(-x).reshape(-1, 1)))

In [11]:
def der(x):
    l = x - T(F, B, d, x)
    return Jac(x).T.dot(l)

In [12]:
def Hess(x):
    return Jac(x).T.dot(Jac(x))

In [12]:
def LineSearch(x, dx):
    alpha = obj(x) / np.linalg.norm(dx) ** 2
    while True:
        if obj(x + alpha * dx) < obj(x):
            return x + alpha * dx
        else:
            alpha /= 2

In [None]:
x_prev = d.copy()
x_next = x_prev.copy()

for k in range(100):
    d = sp.sparse.linalg.spsolve(Hess(x_prev), -der(x_prev))
    #x_next = LineSearch(x_prev, -dx)
    x_next = x_prev + d
    if obj(x_next) < 1.0:
        break
    print(k, obj(x_next))
    x_prev = x_next.copy()

In [13]:
der(d)

array([ 0.,  0.,  0., ...,  0.,  0.,  0.])

In [14]:
Hess(d)

<326130x326130 sparse matrix of type '<class 'numpy.float64'>'
	with 7982796 stored elements in Compressed Sparse Column format>

In [21]:
# RepRank

def RepRank(F, B, d, maxiter=200, x0=None, tol=1e-8, callback=None):
    if x0 is None:
        x_prev = d.copy()
    else:
        x_prev = x0.copy()

    for k in range(maxiter):
        x_next = T(F, B, d, x_prev)
        n = np.linalg.norm(x_next - x_prev)
        if callback is not None:
            callback(x_next, n)
        if n < tol:
            break
        x_prev = x_next

    ans = x_next.copy()
    return(k + 1, ans.reshape(-1,))

In [22]:
k, ans = RepRank(F, B, d)

In [34]:
np.linalg.norm(der(ans))

3.7241004357451673e-09