In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import os

import scipy
import networkx as nx

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

AttributeError: type object 'h5py.h5.H5PYConfig' has no attribute '__reduce_cython__'

In [None]:
seed = 42
no_users = 80 
lambda_logistic = 1e-3

In [None]:
def random_split(X, y, n, seed):
    """Equally split data between n agents"""
    rng = np.random.default_rng(seed)
    perm = rng.permutation(y.size)
    X_split = np.array_split(X[perm], n)  #np.stack to keep as a np array
    y_split = np.array_split(y[perm], n)
    return X_split, y_split

In [None]:
def loss(w, A, b, l2):
    z = b * np.dot(A, w)
    tmp = np.minimum(z, 0)
    loss = np.log((np.exp(tmp) + np.exp(tmp - z)) / np.exp(tmp))
    loss_sum = np.sum(loss) / len(b)
    reg = (np.linalg.norm(w) ** 2) * l2 / 2
    return loss_sum + reg

In [None]:
def gradient(w, A, b, l2):
    m = A.shape[0]
    bAw = b * (A @ w)
    temp = 1. / (1. + np.exp(bAw))
    res = -(A.T @ (b * temp))/m + l2 * w
    return res

In [None]:
def hessian(w, A, b, l2):
    bAw = b * (A @ w)
    activation = scipy.special.expit(bAw)
    weights = activation * (1-activation)
    A_weighted = np.multiply(A.T, weights)
    return A_weighted@A/A.shape[0] + l2*np.eye(A.shape[1])

In [None]:
def generate_graph(n, seed):
    """Generate a random connected graph"""
    while True:
        g = nx.generators.random_graphs.binomial_graph(n, 0.4, seed = seed) 
        if nx.algorithms.components.is_connected(g):
            return g

In [None]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

In [None]:
def metropolis_weights(Adj):
    N = np.shape(Adj)[0]
    degree = degrees(Adj)
    W = np.zeros([N, N])
    for i in range(N):
        N_i = np.nonzero(Adj[i, :])[1]  # Fixed Neighbors
        for j in N_i:
            W[i, j] = 1/(1+np.max([degree[i], degree[j]]))
        W[i, i] = 1 - np.sum(W[i, :])
    return W

In [None]:
G = generate_graph(no_users, seed)
adjacency_matrix = nx.linalg.graphmatrix.adjacency_matrix(G)
comm_matrix = metropolis_weights(adjacency_matrix)
print(G.number_of_edges())
print(G.number_of_nodes())
#nx.draw(G, with_labels=True, font_weight='bold')

In [None]:
theta_opt = np.load('x_opt.npy')

In [None]:
X = np.load('X.npy')
y = np.load('y.npy').ravel()

num_feature = X.shape[1] #+ 1 #+1 for bias

In [None]:
X, y = random_split(X, y, no_users, seed)

theta = [np.zeros(num_feature) for _ in range(no_users)]


d = [np.zeros(num_feature) for _ in range(no_users)] # direction
d_old = [np.zeros(num_feature) for _ in range(no_users)] # direction
d_new = [np.zeros(num_feature) for _ in range(no_users)] # direction
d_f = [np.zeros(num_feature) for _ in range(no_users)] # direction

grad = [np.zeros(num_feature) for _ in range(no_users)] 

Hess = [np.zeros([num_feature, num_feature]) for _ in range(no_users)] # 

In [None]:
# Optimal objective function, i.e., f(x*) 
obj0 = 0.333347206075705 # a9a dataset

In [None]:
n_iters = 200
gamma = 0.01
alpha = 0.08 #1.1
losses_Extg = []
re_Extg = np.zeros(shape=[n_iters])
theta0 = np.zeros(num_feature)
op_gap_Extg = np.zeros(shape=[n_iters])


for k in range(n_iters):
    print(k)
    for i in range(no_users):
        grad[i] = gradient(theta[i], X[i], y[i], lambda_logistic)
        Hess[i] = hessian(theta[i], X[i], y[i], lambda_logistic)
        d[i] = d_old[i] - gamma*(np.matmul(Hess[i]+alpha* np.eye(num_feature),d_old[i])-grad[i])
        d_new[i] = d_old[i] - gamma*(np.matmul(Hess[i]alpha* np.eye(num_feature),d[i])-grad[i])
        d_f[i] = np.zeros(num_feature)
        d_f[i] = np.add(d_f[i], comm_matrix[i, i] * d_new[i])
        for j in G.neighbors(i):
            d_f[i] = np.add(d_f[i], comm_matrix[i, j] * d_new[j])
        
        

    for i in range(no_users):
        theta[i] = theta[i] - d_f[i]
        d_old[i] = d_f[i]
      
    #print(np.linalg.norm(d[10]))
        
    # Performance Check
    theta_avg = 1/no_users*sum(theta)   
    
    for i in range(no_users):
        loss_Extg = np.mean([loss(theta_avg, X[i], y[i], lambda_logistic) for i in range(no_users)])
        losses_Extg.append(loss_Extg)
    
    op_gap_Extg[k] = np.abs(losses_dnl[-1] - obj0)

# Optimality Gap

In [None]:
import matplotlib.pyplot as plt
plt.semilogy(op_gap_Extg)
#plt.xlim([-5,50])
np.save('op_gap_Extg', op_gap_Extg)

In [None]:
theta_opt[10]

In [None]:
theta_avg[10]