In [1]:
import sys
import os
spr_path = "/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/code/SpringRank/python"
sys.path.append(os.path.abspath(spr_path))
import SpringRank_tools as SR
import csv
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics


In [2]:
import networkx as nx
import numpy as np
import SpringRank_tools as sr
import tools as tl

In [3]:
input_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/input/'
output_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/output/'

In [4]:
def getGraph(indj):
    G = tl.build_graph_from_adjacency(indj)
    return G;

def graphProp(G):
    n=len(G.nodes)
    e=len(G.edges)
    print('no. of nodes= {0}, no. of edges= {1}'.format(n,e))

In [14]:
def formatChessFile(source,destination):
    with open(source,'r') as chess, open(destination, 'w') as out:
        chess = csv.reader(chess, delimiter=' ')
        writer = csv.writer(out, delimiter=' ')
        for row in chess:
            if '%' not in row:
                line=[]
                result = row[2].split('\t')
                if(result[0] == '1'):
                    line = [row[0],row[1],1]
                elif(result[0] == '-1'):
                    line = [row[1],row[0],1]
                if line:
                    writer.writerow(line)

def getChessGraphData(to_format):
    source = input_data_dir+'new_chess.data'
    destination = input_data_dir+'new_chess.data'
    if to_format: formatChessFile(source,destination)
    return getGraph(destination)

In [1347]:
def run(G,alpha,l0,l1):
    nodes=list(G.nodes()) #  determines the order of the entries of matrix A
    A=nx.to_numpy_matrix(G,nodelist=nodes,weight='weight')
    '''
    Extracts SpringRank
    '''
    rank=sr.SpringRank(A,alpha=alpha,l0=l0,l1=l1)
    rank=tl.shift_rank(rank)   # (optional) shifts so that the min is in zero and the others are positive
    
    unordered_tuples=[(nodes[i],rank[i]) for i in range(G.number_of_nodes())]
    #ordered_x= sorted(rank, key=lambda tup: int(tup[0]),reverse=False)
    return rank,unordered_tuples

def save(sorted_tuples,alpha,l0,l1,G,file):
    '''
    Prints results
    '''
    print('SpringRank scores:')
    outfile=output_data_dir+'/'+file+'_SpringRank_'+'a'+str(alpha)+'_l0_'+str(l0)+'_l1_'+str(l1)+'.dat'
    outf=open(outfile,'w')

    for i in range(G.number_of_nodes()):
        outf.write("{} {}\n".format(sorted_tuples[i][0],sorted_tuples[i][1]))
        # print nodes[i],rank[i]
        #print(X[i][0],X[i][1])
    print('Results saved in:', outfile)
    outf.close()
    


In [1258]:
def getEqn39(rank,A,start,end,step):
    x = np.arange(start,end,step)
    y=[]
    for xi in x:
        y.append(tl.eqs39(xi,rank,A))
    return x,y

def eqn39SimplePlot(rank,A,save):
    x,y = getEqn39(rank,A,0.1,20,1)
    plt.plot(x,y)
    plt.title('Eqn 39 :'+save)
    if save:
        plt.savefig(output_data_dir+save+'.svg')

def prediction(beta,G,_scores):
    preds = {}
    for i in range(len(_scores)):
        for j in range(len(_scores)):
            if(i!=j):
                if not (G.has_edge(i,j) or G.has_edge(j,i)):
                    preds[(i,j)] = 0
                else:
                    preds[(i,j)] = 1/(1+np.exp(-2*beta*(_scores[i]-_scores[j])))
    return preds

def getAccuracy(preds,G,Adj,num_edges):
    loss=0
    total=0
    size,_ = Adj.shape
    for i in range(size):
        for j in range(size):
            if(i!=j and G.has_edge(i,j)):
                loss+=(Adj[(i,j)]*preds[(i,j)])
    for i in range(size):
        for j in range(size):
            if(i!=j and G.has_edge(i,j)):    
                total+=Adj[(i,j)]
    total = 1-(0.5*(total-loss)/num_edges)
    return total

def testsetAccuracy(preds,Adj_orij,Adj_test,num_edges):
    loss=0;
    size,_ = Adj_orij.shape
    for i in range(size):
        for j in range(size):
            if (i,j) in preds:
                loss+=Adj_test[(i,j)]-((Adj_test[(i,j)]+Adj_test[(j,i)])*preds[(i,j)])
    loss=1-(loss*1.0/(2.0*num_edges))
    return loss

def testsetPrediction(beta,ranks,test_set):
    preds = {}
    for i,j,_ in (test_set):
        preds[(i,j)] = 1/(1+np.exp(-2*beta*(ranks[i]-ranks[j])))
    return preds

def crosssetPrediction(beta,ranks):
    preds = {}
    for i in range(len(ranks)):
        for j in range(len(ranks)):
            preds[(i,j)] = 1/(1+np.exp(-2*beta*(ranks[i]-ranks[j])))
    return preds

def crosssetAccuracy(preds,Adj,num_edges):
    loss=0;
    size,_ = Adj.shape
    for i in range(size):
        for j in range(size):
            loss+=Adj[(i,j)]-((Adj[(i,j)]+Adj[(j,i)])*preds[(i,j)])
    #print(loss)
    loss=1-(loss*1.0/(2.0*num_edges))
    return loss


In [1376]:
# generative model 
#new
def expectedEqn(beta,c,si,sj):
    return c*np.exp(-beta*0.5*(si-sj-1)*(si-sj-1))

def getC(beta,num_nodes,scores):
    c = 10*num_nodes
    total = 0;
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            total+=np.exp(-beta*0.5*(scores[i]-scores[j]-1)*(scores[i]-scores[j]-1))
    return c*1.0/(total*1.0)
#old
def eqn(si,sj,beta,c):
    return c*np.exp(-beta*0.5*(si-sj-1)*(si-sj-1))

def poisson(mean):
    return np.random.poisson(mean)

def createNetwork(scores,beta,c,num_nodes):
    G = nx.DiGraph()
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            if(i!=j):
                G.add_node(i)
                G.add_node(j)
                mean = expectedEqn(beta,c,scores[i],scores[j])
                weight = np.random.poisson(mean)
                if weight>0:
                    G.add_edge(i,j,weight=weight)
    
    return G
def generateNetwork(beta):
    number_of_nodes=100
    mu, sigma = 0.5,1 # mean and standard deviation
    scores = np.random.normal(mu, sigma, number_of_nodes)
    c=getC(beta,scores,number_of_nodes)
    
    G = createNetwork(scores,beta,c)
    A = nx.to_numpy_matrix(G,nodelist=list(G.nodes),weight='weight')
    return G,A,scores

def getTrainingandTestSet(G):
    removed_edges = []
    edges_to_remove = list(G.edges)
    choice_idx = np.random.choice(len(edges_to_remove), int(len(edges_to_remove)*0.2), replace=False)
    print(len(choice_idx))
    for i in range(len(choice_idx)):
        a,b=edges_to_remove[choice_idx[i]]
        w=G.edges[(a,b)]['weight'] 
        #remove single edges
        G.remove_edge(a,b)
        #if w>1:
        #    G.add_edge(a,b,weight=w-1)
        #    removed_edges.append((a,b,w-1))
        #else:
        removed_edges.append((a,b,w))
        G.add_node(a)
        G.add_node(b)
    return G,removed_edges



In [1377]:
G_1,A_1,scores1 = generateNetwork(beta=1) # Network A
G_2,A_2,scores2 = generateNetwork(beta=1) # Network B


TypeError: only integer scalar arrays can be converted to a scalar index

In [1370]:
graphProp(G_1)
graphProp(G_2)

no. of nodes= 100, no. of edges= 950
no. of nodes= 100, no. of edges= 959


# Test 1

In [1307]:
G_1_copy = G_1.copy()
G_1_train,edges1_test = getTrainingandTestSet(G_1_copy) 

182


In [1308]:
len(edges1_test)

182

In [1309]:
graphProp(G_1)
graphProp(G_1_train)


no. of nodes= 100, no. of edges= 910
no. of nodes= 100, no. of edges= 728


In [1310]:
alpha=0.
l0=0.
l1=1.    
rank1_train,tuples1_train = run(G_1_train,alpha,l0,l1)
A_1_train = nx.to_numpy_matrix(G_1_train,nodelist=list(G_1_train.nodes),weight='weight')
temp1_train=tl.get_optimal_temperature(rank1_train,A_1_train)
beta1_train = 1/temp1_train
print((beta1_train))


0.5951843622768868


In [1324]:
preds1_train = prediction(0.01,G_1_train,rank1_train)
sigma1_train = getAccuracy(preds1_train,G_1_train,A_1_train,G_1_train.number_of_edges())

In [1325]:
sigma1_train

0.73211061875621786

In [1326]:
# Create a test graph from removed edges
G_1_test = nx.DiGraph()
for(a,b,w) in edges1_test:
    G_1_test.add_edge(a,b,weight=w)

for a in G_1.nodes():
    G_1_test.add_node(a)

In [1327]:
graphProp(G_1_test)

no. of nodes= 100, no. of edges= 182


In [1332]:
# Predict test set using trained parameters
alpha=0.
l0=0.
l1=1.    
A_1_test = nx.to_numpy_matrix(G_1_test,nodelist=list(G_1_test.nodes),weight='weight')
preds1_test = prediction(beta1_train,G_1_test,rank1_train)
sigma1_test = getAccuracy(preds1_test,G_1_test,A_1_test,G_1_test.number_of_edges())

In [1361]:
sigma1_test

0.77865629198217001

In [1356]:
preds2 = prediction(beta1_train,G_2,rank1_train)
sigma2 = getAccuracy(preds2,G_2,A_2,G_2.number_of_edges())

In [1357]:
sigma2

0.72589618427342828

In [1359]:
1-sigma2

0.27410381572657172

In [1358]:
preds2

{(50, 96): 0,
 (57, 50): 0,
 (40, 22): 0,
 (39, 70): 0,
 (43, 3): 0,
 (63, 76): 0,
 (46, 12): 0,
 (29, 44): 0,
 (89, 11): 0,
 (33, 41): 0,
 (16, 47): 0,
 (90, 42): 0,
 (73, 82): 0,
 (36, 34): 0,
 (2, 78): 0,
 (95, 30): 0,
 (97, 52): 0.54266650702425745,
 (80, 12): 0,
 (79, 76): 0,
 (9, 0): 0,
 (83, 9): 0,
 (12, 59): 0,
 (86, 22): 0,
 (69, 38): 0,
 (52, 98): 0,
 (15, 30): 0,
 (73, 35): 0,
 (99, 86): 0.61221966082059709,
 (19, 91): 0,
 (76, 88): 0,
 (99, 30): 0,
 (42, 88): 0,
 (5, 84): 0,
 (62, 31): 0,
 (45, 61): 0,
 (49, 58): 0,
 (52, 17): 0,
 (59, 97): 0,
 (25, 49): 0,
 (8, 87): 0,
 (82, 66): 0,
 (45, 78): 0,
 (28, 10): 0,
 (85, 87): 0,
 (32, 77): 0,
 (31, 15): 0,
 (14, 77): 0,
 (1, 40): 0,
 (75, 97): 0,
 (21, 37): 0,
 (4, 35): 0,
 (78, 46): 0,
 (8, 38): 0,
 (7, 22): 0,
 (65, 11): 0,
 (48, 77): 0.80322270335454971,
 (11, 83): 0,
 (85, 56): 0,
 (68, 0): 0,
 (72, 67): 0,
 (54, 87): 0,
 (1, 89): 0,
 (58, 26): 0,
 (41, 34): 0,
 (95, 64): 0,
 (78, 89): 0,
 (61, 63): 0,
 (44, 25): 0.58286060

In [1320]:
test = G_1.copy()


# Train 100%

In [1321]:
alpha=0.
l0=0.
l1=1.    
rank1,tuples1 = run(G_1,alpha,l0,l1)
A_1 = nx.to_numpy_matrix(G_1,nodelist=list(G_1.nodes),weight='weight')
temp1=tl.get_optimal_temperature(rank1,A_1)
beta1 = 1/temp1
print((beta1_train))


0.5951843622768868


In [1322]:
preds1 = prediction(beta1,G_1,rank1)
sigma1 = getAccuracy(preds1,G_1,A_1,G_1.number_of_edges())

In [1323]:
sigma1

0.80065484968266432

In [1362]:
G_small = nx.MultiDiGraph()
G_small.add_edge(0,1)
G_small.add_edge(0,1)
G_small.add_edge(1,0)
alpha=0.
l0=0.
l1=1.    
rank1_small,tuples1_small = run(G_small,alpha,l0,l1)
A_1_small = nx.to_numpy_matrix(G_small,nodelist=list(G_small.nodes),weight='weight')
temp1_small=tl.get_optimal_temperature(rank1_small,A_1_small)
beta1_small = 1/temp1_small
print((beta1_small))


1.9235933878519509


In [1365]:
preds2 = prediction(beta1_small,G_small,rank1_small)
sigma2 = getAccuracy(preds2,G_small,A_2,G_small.number_of_edges())

In [1366]:
preds2

{(0, 1): 0.50033333328395069, (1, 0): 0.49966666671604931}

In [1351]:
sigma2

0.84523984387496798

In [1352]:
beta1_small

1.9235933878519509

In [1353]:
rank1_small

array([ 0.66666667,  0.        ])