In [82]:
import sys
import os
spr_path = "/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/code/SpringRank/python"
sys.path.append(os.path.abspath(spr_path))
import SpringRank_tools as SR
import csv
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics
import time
import pandas as pd
import networkx as nx
import numpy as np
import SpringRank_tools as sr
import tools as tl

In [83]:
input_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/input/'
output_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/output/'

In [84]:
def graphProp(A,description):
    n=(A.shape[0])
    e=(A.sum())
    print(description)
    print(' no. of nodes= {0}, no. of edges= {1}'.format(n,e))

In [85]:
# SR 
def run(A,alpha,l0,l1):
    '''
    Extracts SpringRank
    '''
    rank=sr.SpringRank(A,alpha=alpha,l0=l0,l1=l1)
    rank=tl.shift_rank(rank)   # (optional) shifts so that the min is in zero and the others are positive
    
    unordered_tuples=[(i,rank[i]) for i in range(A.shape[0])]
    return rank,unordered_tuples

def save(sorted_tuples,alpha,l0,l1,G,file):
    '''
    Prints results
    '''
    print('SpringRank scores:')
    outfile=output_data_dir+'/'+file+'_SpringRank_'+'a'+str(alpha)+'_l0_'+str(l0)+'_l1_'+str(l1)+'.dat'
    outf=open(outfile,'w')

    for i in range(G.number_of_nodes()):
        outf.write("{} {}\n".format(sorted_tuples[i][0],sorted_tuples[i][1]))
    print('Results saved in:', outfile)
    outf.close()
    
def getEqn39(rank,A,start,end,step):
    x = np.arange(start,end,step)
    y=[]
    for xi in x:
        y.append(tl.eqs39(xi,rank,A))
    return x,y

def eqn39SimplePlot(rank,A,save):
    x,y = getEqn39(rank,A,0.1,20,1)
    plt.plot(x,y)
    plt.title('Eqn 39 :'+save)
    if save:
        plt.savefig(output_data_dir+save+'.svg')

   


In [86]:
def prediction(beta,Adj,scores):
    preds={}
    size=Adj.shape[0]
    for i in range(0,size):
        for j in range(0,size):
            if(Adj[(i,j)]>0 or Adj[(j,i)]>0):
                preds[(i,j)] = 1/(1+np.exp(-beta*2*(scores[i]-scores[j])))
            else: preds[(i,j)] = 0
    return preds

def getAccuracy(preds,Adj,num_edges):
    total=0
    size = Adj.shape[0]
    for i in range(0,size):
        for j in range(0,size):
            if(i!=j and (Adj[(i,j)]>0 or Adj[(j,i)]>0)):
                total += abs(Adj[(i,j)]-((Adj[(i,j)]+Adj[j,i])*preds[(i,j)]))
    total = 1-((total)/(2*num_edges))
    return total

 


# Create Synthetic network

In [87]:
def _expectedEqn(beta,c,si,sj):
    return c*np.exp(-beta*0.5*(si-sj-1)*(si-sj-1))

def _getC(beta,num_nodes,scores):
    c = 20*num_nodes
    total = 0;
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            total+=np.exp(-beta*0.5*(scores[i]-scores[j]-1)*(scores[i]-scores[j]-1))
    return c*1.0/(total)

def _createNetwork(scores,beta,c,num_nodes):
    A = np.zeros((num_nodes,num_nodes),dtype=np.int)
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            if(i!=j):
                mean = _expectedEqn(beta,c,scores[i],scores[j])
                weight = np.random.poisson(mean)
                A[i,j] = weight
    return A

# generate a 100 node network from normal distribution with given beta 
def generateNetwork(beta):
    number_of_nodes=100
    mu, sigma = 0.5,1 # mean and standard deviation
    #np.random.seed(int(time.time()))
    scores = np.random.normal(mu, sigma, number_of_nodes+1)
    c=_getC(beta,number_of_nodes,scores)
    A = _createNetwork(scores,beta,c,number_of_nodes)
    A = np.matrix(A,copy=False)
    return A,scores

def _getEdgeList(A):
    edge_list = []
    shape,_=A.shape
    for i in range(0,shape):
        for j in range(0,shape):
            edge_list.append((i,j))
    return edge_list
        
    
#Split given adjacency into train (80%) and test (20%)
#remove interaction, regardless of weight 
def getTrainingandTestSet(A_orig):
    A_train = A_orig.copy()
    A_test  = np.zeros((A_orig.shape),dtype=np.int)
    edge_list = _getEdgeList(A_orig)
    np.random.shuffle(edge_list)
    choice_idx = np.random.choice(len(edge_list), int(len(edge_list)*0.2), replace=False)
    for i in choice_idx:
        (k,l) = edge_list[i]
        A_test[(k,l)] = A_orig[(k,l)]
        A_train[(k,l)] = 0
    A_train = np.matrix(A_train,copy=False)
    A_test = np.matrix(A_test,copy=False)    
    return A_train,A_test


    

In [88]:
# Find beta and ranks for given adjacency ; should be np.matrix type
def trainBetaAndRanks(A,alpha,l0,l1):
    ranks,tuples = run(A,alpha,l0,l1)
    temp=tl.get_optimal_temperature(ranks,A)
    beta = 1/temp
    print("Optimal Beta: "+str(beta))
    return beta,ranks

def getPredictionsAndAccuracy(beta,A,ranks):
    preds = prediction(beta,A,ranks)
    acc = getAccuracy(preds,A,A.sum())
    print("Accuracy : "+str(acc))
    return preds,acc



# Create networks 1,2,3 ; Create training and test sets ; Run experiments

In [89]:
def experimentBeta(beta):
    A1,scores1 = generateNetwork(beta=beta) # Adjacency Network A
    A2,scores2 = generateNetwork(beta=beta) # Adjacency Network B
    A3,scores3 = generateNetwork(beta=beta) # Adjacency Network C
    A1_train,A1_test = getTrainingandTestSet(A1)
    A3_train,A3_test = getTrainingandTestSet(A3)
    A13 = np.add(A1,A3)
    A13_train,A13_test = getTrainingandTestSet(A13)
    graphProp(A1,"Network A")
    graphProp(A1_train,"NetworkA Training")
    graphProp(A1_test,"NetworkA Test")
    graphProp(A2,"Network B")
    graphProp(A3,"NetworkC")
    graphProp(A13,"Network A+C")
    
    # =================== Experiment 1  (A --> B) ===================
    A1_beta_train,A1_ranks_train,A1_preds_train,A1_acc_train = trainingExperiment(A1_train,0,0,1)
    A1_preds_test,A1_acc_test = getPredictionsAndAccuracy(A1_beta_train,A1_test,A1_ranks_train)
    A12_preds,A12_acc = getPredictionsAndAccuracy(A1_beta_train,A2,A1_ranks_train)
    
    # =================== Experiment 2 (C --> B) ===================
    A3_beta_train,A3_ranks_train,A3_preds_train,A3_acc_train = trainingExperiment(A3_train,0,0,1)
    A3_preds_test,A3_acc_test = getPredictionsAndAccuracy(A3_beta_train,A3_test,A3_ranks_train)
    A32_preds,A32_acc = getPredictionsAndAccuracy(A3_beta_train,A2,A3_ranks_train)

    # =================== Experiment 3 (A+C --> B) ===================
    A13_beta_train,A13_ranks_train,A13_preds_train,A13_acc_train = trainingExperiment(A13_train,0,0,1)
    A13_preds_test,A13_acc_test = getPredictionsAndAccuracy(A13_beta_train,A13_test,A13_ranks_train)
    A132_preds,A132_acc = getPredictionsAndAccuracy(A13_beta_train,A2,A13_ranks_train)

    experiment_names = ["Experiment 1 (A-->B)","Experiment 2 (C-->B)","Experiment 3 (A+C-->B)"]
    experiments = [[A1_acc_train,A1_acc_test,A12_acc],[A3_acc_train,A3_acc_test,A32_acc],[A13_acc_train,A13_acc_test,A132_acc]]
    data = np.array([
        ['','Training data (80%)','Test data (20%)','Network B (100%)'],
        [experiment_names[0],A1_acc_train,A1_acc_test,A12_acc],
        [experiment_names[1],A3_acc_train,A3_acc_test,A32_acc],
        [experiment_names[2],A13_acc_train,A13_acc_test,A132_acc]])
    dataframe  = pd.DataFrame(data=data[1:,1:],
                         index=data[1:,0],
                         columns=data[0,1:])
    return dataframe


# Run experiment and Results

In [90]:
# Run experiments with beta values
beta_exp = [0.5,1,1.5,2,2.5]
df = {}
for i in beta_exp:
    df["beta="+str(i)] = experimentBeta(i)

Network A
 no. of nodes= 100, no. of edges= 2010
NetworkA Training
 no. of nodes= 100, no. of edges= 1609
NetworkA Test
 no. of nodes= 100, no. of edges= 401
Network B
 no. of nodes= 100, no. of edges= 1945
NetworkC
 no. of nodes= 100, no. of edges= 1998
Network A+C
 no. of nodes= 100, no. of edges= 4008
Optimal Beta: 0.8346770478808474
Accuracy : 0.642798161205
Accuracy : 0.571079092621
Accuracy : 0.543114209046
Optimal Beta: 0.8308165715437807
Accuracy : 0.639192549883
Accuracy : 0.586462409281
Accuracy : 0.54622890729
Optimal Beta: 1.0753593551670053
Accuracy : 0.644925473168
Accuracy : 0.555412252365
Accuracy : 0.549808759879
Network A
 no. of nodes= 100, no. of edges= 2000
NetworkA Training
 no. of nodes= 100, no. of edges= 1621
NetworkA Test
 no. of nodes= 100, no. of edges= 379
Network B
 no. of nodes= 100, no. of edges= 1930
NetworkC
 no. of nodes= 100, no. of edges= 1992
Network A+C
 no. of nodes= 100, no. of edges= 3992
Optimal Beta: 0.6856866752694567
Accuracy : 0.6662844534

In [91]:
pd.concat(df)

Unnamed: 0,Unnamed: 1,Training data (80%),Test data (20%),Network B (100%)
beta=0.5,Experiment 1 (A-->B),0.642798161205,0.571079092621,0.543114209046
beta=0.5,Experiment 2 (C-->B),0.639192549883,0.586462409281,0.54622890729
beta=0.5,Experiment 3 (A+C-->B),0.644925473168,0.555412252365,0.549808759879
beta=1,Experiment 1 (A-->B),0.666284453442,0.619964457266,0.531134099689
beta=1,Experiment 2 (C-->B),0.661521973724,0.619193868688,0.52523492359
beta=1,Experiment 3 (A+C-->B),0.649600933851,0.567190371921,0.528370153921
beta=1.5,Experiment 1 (A-->B),0.653553551976,0.613849527839,0.520881434671
beta=1.5,Experiment 2 (C-->B),0.653520886797,0.620613202785,0.520971026466
beta=1.5,Experiment 3 (A+C-->B),0.656982337577,0.598999782782,0.516612373739
beta=2,Experiment 1 (A-->B),0.647072250069,0.628661086923,0.508939631558


# Same as above but done step by step

In [92]:
A1,scores1 = generateNetwork(beta=1) # Adjacency Network A
A2,scores2 = generateNetwork(beta=1) # Adjacency Network B
A3,scores3 = generateNetwork(beta=1) # Adjacency Network C
A1_train,A1_test = getTrainingandTestSet(A1)
A3_train,A3_test = getTrainingandTestSet(A3)


In [93]:
graphProp(A1,"Network A")
graphProp(A1_train,"NetworkA Training")
graphProp(A1_test,"NetworkA Test")
graphProp(A2,"Network B")
graphProp(A3,"NetworkC")
graphProp(A13,"Network A+C")




Network A
 no. of nodes= 100, no. of edges= 1934
NetworkA Training
 no. of nodes= 100, no. of edges= 1559
NetworkA Test
 no. of nodes= 100, no. of edges= 375
Network B
 no. of nodes= 100, no. of edges= 2005
NetworkC
 no. of nodes= 100, no. of edges= 1899
Network A+C
 no. of nodes= 100, no. of edges= 3953


In [95]:
# Helper function
def trainingExperiment(A,alpha,l0,l1):
    beta,ranks = trainBetaAndRanks(A,alpha,l0,l1)
    preds,acc =  getPredictionsAndAccuracy(beta,A,ranks)
    return beta,ranks,preds,acc


# =================== Experiment 1  (A --> B) ===================


## Learn A_80%

In [102]:
A1_beta_train,A1_ranks_train,A1_preds_train,A1_acc_train = trainingExperiment(A1_train,0,0,1)

Optimal Beta: 0.5806825704042873
Accuracy : 0.650598180165


## Predict A_20% using A_80% parameters


In [97]:
A1_preds_test,A1_acc_test = getPredictionsAndAccuracy(A1_beta_train,A1_test,A1_ranks_train)


Accuracy : 0.607578013814


## Predict B_100% using A_80%

In [98]:
A12_preds,A12_acc = getPredictionsAndAccuracy(A1_beta_train,A2,A1_ranks_train)


Accuracy : 0.565225059181


# =================== Experiment 2 (C --> B) ===================

## Learn C_80%

In [99]:
A3_beta_train,A3_ranks_train,A3_preds_train,A3_acc_train = trainingExperiment(A3_train,0,0,1)

Optimal Beta: 0.5804397833656891
Accuracy : 0.661925649906


## Predict C_20% using C_80% parameters


In [100]:
A3_preds_test,A3_acc_test = getPredictionsAndAccuracy(A3_beta_train,A3_test,A3_ranks_train)


Accuracy : 0.62495057311


## Predict B_100% using C_80%

In [104]:
A32_preds,A32_acc = getPredictionsAndAccuracy(A3_beta_train,A2,A3_ranks_train)


Accuracy : 0.515975461812


# =================== Experiment 3 (A+C --> B) ===================

## Learn A_C_80%

In [105]:
A13_beta_train,A13_ranks_train,A13_preds_train,A13_acc_train = trainingExperiment(A13_train,0,0,1)

Optimal Beta: 0.9487078859705351
Accuracy : 0.655807623539


## Predict A_C_20% using A_C_80% parameters


In [107]:
A13_preds_test,A13_acc_test = getPredictionsAndAccuracy(A13_beta_train,A13_test,A13_ranks_train)


Accuracy : 0.559290888441


## Predict B_100% using A_C_80%

In [108]:
A132_preds,A132_acc = getPredictionsAndAccuracy(A13_beta_train,A2,A13_ranks_train)


Accuracy : 0.546357060927


In [109]:
experiment_names = ["Experiment 1 (A-->B)","Experiment 2 (C-->B)","Experiment 3 (A+C-->B)"]
experiments = [[A1_acc_train,A1_acc_test,A12_acc],[A3_acc_train,A3_acc_test,A32_acc],[A13_acc_train,A13_acc_test,A132_acc]]

In [110]:
data = np.array([
        ['','Training data (80%)','Test data (20%)','Network B (100%)'],
        [experiment_names[0],A1_acc_train,A1_acc_test,A12_acc],
        [experiment_names[1],A3_acc_train,A3_acc_test,A32_acc],
        [experiment_names[2],A13_acc_train,A13_acc_test,A132_acc]])
dataframe  = pd.DataFrame(data=data[1:,1:],
                         index=data[1:,0],
                         columns=data[0,1:])
print(dataframe)

                       Training data (80%) Test data (20%) Network B (100%)
Experiment 1 (A-->B)        0.650598180165  0.607578013814   0.565225059181
Experiment 2 (C-->B)        0.661925649906   0.62495057311   0.515975461812
Experiment 3 (A+C-->B)      0.655807623539  0.559290888441   0.546357060927


# Trying a small graph. Things we discussed during the meeting. Nothing interesting beyond this

In [195]:
#create  a toy graph
G_small = nx.MultiDiGraph()
G_small.add_edge(0,1)
G_small.add_edge(0,1)
G_small.add_edge(1,0)

0

In [196]:
G_small.edges

OutMultiEdgeView([(0, 1, 0), (0, 1, 1), (1, 0, 0)])

In [197]:
# learn ranks and get optimal temperature 
A1_small = nx.to_numpy_matrix(G_small,nodelist=list(G_small.nodes))
A1_beta_small,A1_ranks_small = trainBetaAndRanks(A1_small,0,0,1)
A1_preds_small,A1_acc_small = getPredictionsAndAccuracy(A1_beta_small,A1_small,A1_ranks_small)

Optimal Beta: 1.9235933878519509
Accuracy : 0.476211459833


In [199]:
(A1_small+A1_small).sum()

6.0

In [552]:
A1_preds_small

{(0, 0): 0,
 (0, 1): 0.92856093675019202,
 (1, 0): 0.071439063249808032,
 (1, 1): 0}

In [553]:
beta_a = np.log(2)*3/4

In [554]:
1/(1+np.exp(-4/3*beta_a))

0.66666666666666663

In [555]:
rank1_small

array([ 0.66666667,  0.        ])

In [265]:
G_small.edges

OutMultiEdgeView([(0, 1, 0), (0, 1, 1), (1, 0, 0)])

In [266]:
beta_a

0.51986038541995894