In [1]:
import sys
import os
spr_path = "/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/code/SpringRank/python"
sys.path.append(os.path.abspath(spr_path))
import SpringRank_tools as SR
import csv
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics


In [2]:
import networkx as nx
import numpy as np
import SpringRank_tools as sr
import tools as tl

In [3]:
input_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/input/'
output_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/output/'

In [4]:
def getGraph(indj):
    G = tl.build_graph_from_adjacency(indj)
    return G;

def graphProp(G):
    n=len(G.nodes)
    e=len(G.edges)
    print('no. of nodes= {0}, no. of edges= {1}'.format(n,e))

In [14]:
def formatChessFile(source,destination):
    with open(source,'r') as chess, open(destination, 'w') as out:
        chess = csv.reader(chess, delimiter=' ')
        writer = csv.writer(out, delimiter=' ')
        for row in chess:
            if '%' not in row:
                line=[]
                result = row[2].split('\t')
                if(result[0] == '1'):
                    line = [row[0],row[1],1]
                elif(result[0] == '-1'):
                    line = [row[1],row[0],1]
                if line:
                    writer.writerow(line)

def getChessGraphData(to_format):
    source = input_data_dir+'new_chess.data'
    destination = input_data_dir+'new_chess.data'
    if to_format: formatChessFile(source,destination)
    return getGraph(destination)

In [6]:
def run(G,alpha,l0,l1):
    nodes=list(G.nodes())			#  determines the order of the entries of matrix A
    A=nx.to_numpy_matrix(G,nodelist=nodes)
    '''
    Extracts SpringRank
    '''
    rank=sr.SpringRank(A,alpha=alpha,l0=l0,l1=l1)
    rank=tl.shift_rank(rank)   # (optional) shifts so that the min is in zero and the others are positive
    '''
    Order results so that the first node is the highest-ranked one
    '''
    sorted_tuples=[(nodes[i],rank[i]) for i in range(G.number_of_nodes())]
    #ordered_x= sorted(rank, key=lambda tup: int(tup[0]),reverse=False)
    return rank,sorted_tuples

def save(sorted_tuples,alpha,l0,l1,G,file):
    '''
    Prints results
    '''
    print('SpringRank scores:')
    outfile=output_data_dir+'/'+file+'_SpringRank_'+'a'+str(alpha)+'_l0_'+str(l0)+'_l1_'+str(l1)+'.dat'
    outf=open(outfile,'w')

    for i in range(G.number_of_nodes()):
        outf.write("{} {}\n".format(sorted_tuples[i][0],sorted_tuples[i][1]))
        # print nodes[i],rank[i]
        #print(X[i][0],X[i][1])
    print('Results saved in:', outfile)
    outf.close()
    


In [7]:
def getEqn39(rank,A,start,end,step):
    x = np.arange(start,end,step)
    y=[]
    for xi in x:
        y.append(tl.eqs39(xi,rank,A))
    return x,y

def eqn39SimplePlot(rank,A,save):
    x,y = getEqn39(rank,A,0.1,20,0.1);
    plt.plot(x,y)
    plt.title('Eqn 39 :'+save)
    if save:
        plt.savefig(output_data_dir+save+'.svg')

def prediction(beta,_scores):
    preds = {}
    for i in range(len(_scores)):
        for j in range(len(_scores)):
            preds[(i,j)] = 1/(1+np.exp(-2*beta*(_scores[i]-_scores[j])))
    return preds

def getAccuracy(preds,Adj,num_edges):
    loss=0;
    size,_ = Adj.shape
    for i in range(size):
        for j in range(size):
            loss+=Adj[(i,j)]-((Adj[(i,j)]+Adj[(j,i)])*preds[(i,j)])
    print(loss)
    loss=1-(loss*1.0/(2.0*num_edges))
    return loss

In [8]:
def getGroundTruth(preds,G):
    y_true = []
    y_target = []
    for i in range(len(G.nodes)):
        for j in range(len(G.nodes)):
            if(G.has_edge(str(i),str(j)) or G.has_edge(i,j)):
                y_true.append(1)
            else: y_true.append(0)
            y_target.append(preds[(i,j)])
    return y_true,y_target
                    
def plotROC(fpr,tpr,roc_auc,title):
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic : '+title)
    plt.legend(loc="lower right")
    plt.show()

In [36]:
# generative model 
def getC(beta,scores,n):
    c=10*n
    den = 0;
    for i in range(len(scores)):
        for j in range(len(scores)):
            den+=np.exp(-beta*0.5*(scores[i]-scores[j]-1)*(scores[i]-scores[j]-1))
    c = c/den
    return c

def eqn(si,sj,beta,c):
    return c*np.exp(-beta*0.5*(si-sj-1)*(si-sj-1))

def poisson(mean):
    return np.random.poisson(lam=mean,size=1)

def createNetwork(scores,beta,c):
    G = nx.DiGraph()
    for i in range(len(scores)):
        for j in range(len(scores)):
            if i!=j:
                mean = eqn(scores[i],scores[j],beta,c)
                w = poisson(mean)
                #print(weight)
                if(G.has_edge(i,j)):
                    w+=G.edges[(i,j)]['weight'][0]                    
                G.add_edge(i,j,weight=w)
    return G

def generateNetwork():
    beta=1
    number_of_nodes=100
    mu, sigma = 0.5, 0.1 # mean and standard deviation
    scores = np.random.normal(mu, sigma, number_of_nodes)
    c=getC(beta,scores,number_of_nodes)

    G = createNetwork(scores,beta,c)
    A = nx.to_numpy_matrix(G,nodelist=list(G.nodes))
    return G,A,scores

def getTrainingandTestSet(G):
    removed_edges = []
    edges_to_remove = list(G.edges)
    choice_idx = np.random.choice(G.number_of_edges(), int((G.number_of_edges())*0.2), replace=False)
    for i in range(len(choice_idx)):
        a,b=edges_to_remove[choice_idx[i]]
        G.remove_edge(a,b)
        removed_edges.append((a,b))
    return G,removed_edges



In [25]:
G_1,A_1,scores1 = generateNetwork()
G_2,A_2,scores2 = generateNetwork()


In [26]:
graphProp(G_1)
graphProp(G_2)

no. of nodes= 100, no. of edges= 9900
no. of nodes= 100, no. of edges= 9900


# Test 1

In [34]:
G_1_copy = G_1.copy()

In [37]:
G_1_train,edges1_test = getTrainingandTestSet(G_1_copy)

In [38]:
graphProp(G_1)
graphProp(G_1_train)


no. of nodes= 100, no. of edges= 9900
no. of nodes= 100, no. of edges= 7920


In [40]:
alpha=0.
l0=0.
l1=1.    
rank1_train,tuples1_train = run(G_1_train,alpha,l0,l1)
A_1_train = nx.to_numpy_matrix(G_1_train,nodelist=list(G_1_train.nodes))
beta1_train=tl.get_optimal_temperature(rank1_train,A_1_train)
print('Beta value for Synthetic network 1'.format(beta1_train))
preds1_train = prediction(beta1_train,rank1_train)
sigma1_train = getAccuracy(preds1_train,A_1_train,G_1_train.number_of_edges())

Beta value for Synthetic network 1
-2.99760216649e-15
