In [73]:
import sys
import os
spr_path = "/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/code/SpringRank/python"
sys.path.append(os.path.abspath(spr_path))
import SpringRank_tools as SR
import csv
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics
import time
import pandas as pd

In [2]:
import networkx as nx
import numpy as np
import SpringRank_tools as sr
import tools as tl

In [3]:
input_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/input/'
output_data_dir = '/Users/Dixit/Documents/Studies/CU_Boulder/sem3/Independent_study/github/SpringRank/data/output/'

In [481]:
def getGraph(indj):
    G = tl.build_graph_from_adjacency(indj)
    return G;

def getMultiDiGraphFromFile(indj):
    G=nx.MultiDiGraph()
    with open(indj,'r') as f:
        f = csv.reader(f,delimiter=' ')
        for row in f:
            G.add_edge(int(row[0]),int(row[1]))
            
    return G;


def graphProp(A,description):
    n=(A.shape[0])
    e=(A.sum())
    print(description)
    print(' no. of nodes= {0}, no. of edges= {1}'.format(n,e))

In [5]:
def formatChessFile(source,destination):
    with open(source,'r') as chess, open(destination, 'w') as out:
        chess = csv.reader(chess, delimiter=' ')
        writer = csv.writer(out, delimiter=' ')
        for row in chess:
            if '%' not in row:
                line=[]
                result = row[2].split('\t')
                if(result[0] == '1'):
                    line = [row[0],row[1],1]
                elif(result[0] == '-1'):
                    line = [row[1],row[0],1]
                if line:
                    writer.writerow(line)

def getChessGraphData(to_format):
    source = input_data_dir+'new_chess.data'
    destination = input_data_dir+'new_chess.data'
    if to_format: formatChessFile(source,destination)
    return getGraph(destination)

In [471]:
def run(A,alpha,l0,l1):
    '''
    Extracts SpringRank
    '''
    rank=sr.SpringRank(A,alpha=alpha,l0=l0,l1=l1)
    rank=tl.shift_rank(rank)   # (optional) shifts so that the min is in zero and the others are positive
    
    unordered_tuples=[(i,rank[i]) for i in range(A.shape[0])]
    return rank,unordered_tuples

def save(sorted_tuples,alpha,l0,l1,G,file):
    '''
    Prints results
    '''
    print('SpringRank scores:')
    outfile=output_data_dir+'/'+file+'_SpringRank_'+'a'+str(alpha)+'_l0_'+str(l0)+'_l1_'+str(l1)+'.dat'
    outf=open(outfile,'w')

    for i in range(G.number_of_nodes()):
        outf.write("{} {}\n".format(sorted_tuples[i][0],sorted_tuples[i][1]))
    print('Results saved in:', outfile)
    outf.close()
    


In [286]:
def getEqn39(rank,A,start,end,step):
    x = np.arange(start,end,step)
    y=[]
    for xi in x:
        y.append(tl.eqs39(xi,rank,A))
    return x,y

def eqn39SimplePlot(rank,A,save):
    x,y = getEqn39(rank,A,0.1,20,1)
    plt.plot(x,y)
    plt.title('Eqn 39 :'+save)
    if save:
        plt.savefig(output_data_dir+save+'.svg')


def prediction(beta,Adj,scores):
    preds={}
    size=Adj.shape[0]
    for i in range(0,size):
        for j in range(0,size):
            if(Adj[(i,j)]>0 or Adj[(j,i)]>0):
                preds[(i,j)] = 1/(1+np.exp(-beta*2*(scores[i]-scores[j])))
            else: preds[(i,j)] = 0
    return preds

# TODO discuss about this part again
def getAccuracy(preds,Adj,num_edges):
    total=0
    size = Adj.shape[0]
    for i in range(0,size):
        for j in range(0,size):
            if(i!=j and (Adj[(i,j)]>0 or Adj[(j,i)]>0)):
                total += abs(Adj[(i,j)]-((Adj[(i,j)]+Adj[j,i])*preds[(i,j)]))
    #not dividing by two as its a directed network (?)
    total = 1-((total)/(2*num_edges))
    return total

 


In [208]:
def expectedEqn(beta,c,si,sj):
    return c*np.exp(-beta*0.5*(si-sj-1)*(si-sj-1))

def getC(beta,num_nodes,scores):
    c = 20*num_nodes
    total = 0;
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            total+=np.exp(-beta*0.5*(scores[i]-scores[j]-1)*(scores[i]-scores[j]-1))
    return c*1.0/(total)

def createNetwork(scores,beta,c,num_nodes):
    A = np.zeros((num_nodes,num_nodes),dtype=np.int)
    for i in range(0,num_nodes):
        for j in range(0,num_nodes):
            if(i!=j):
                mean = expectedEqn(beta,c,scores[i],scores[j])
                weight = np.random.poisson(mean)
                A[i,j] = weight
    return A

def generateNetwork(beta):
    number_of_nodes=100
    mu, sigma = 0.5,1 # mean and standard deviation
    #np.random.seed(int(time.time()))
    scores = np.random.normal(mu, sigma, number_of_nodes+1)
    c=getC(beta,number_of_nodes,scores)
    A = createNetwork(scores,beta,c,number_of_nodes)
    A = np.matrix(A,copy=False)
    return A,scores

def getEdgeList(A):
    edge_list = []
    shape,_=A.shape
    for i in range(0,shape):
        for j in range(0,shape):
            edge_list.append((i,j))
    return edge_list
        
    
# remove interaction, regardless of weight
def getTrainingandTestSet(A_orig):
    A_train = A_orig.copy()
    A_test  = np.zeros((A_orig.shape),dtype=np.int)
    edge_list = getEdgeList(A_orig)
    np.random.shuffle(edge_list)
    choice_idx = np.random.choice(len(edge_list), int(len(edge_list)*0.2), replace=False)
    for i in choice_idx:
        (k,l) = edge_list[i]
        A_test[(k,l)] = A_orig[(k,l)]
        A_train[(k,l)] = 0
    A_train = np.matrix(A_train,copy=False)
    A_test = np.matrix(A_test,copy=False)    
    return A_train,A_test

def trainBetaAndRanks(A,alpha,l0,l1):
    ranks,tuples = run(A,alpha,l0,l1)
    temp=tl.get_optimal_temperature(ranks,A)
    beta = 1/temp
    print("Optimal Beta: "+str(beta))
    return beta,ranks

def getPredictionsAndAccuracy(beta,A,ranks):
    preds = prediction(beta,A,ranks)
    acc = getAccuracy(preds,A,A.sum())
    print("Accuracy : "+str(acc))
    return preds,acc


    

# Create networks 1,2,3 ; Create training and test sets

In [230]:
def experimentBeta(beta):
    A1,scores1 = generateNetwork(beta=beta) # Adjacency Network A
    A2,scores2 = generateNetwork(beta=beta) # Adjacency Network B
    A3,scores3 = generateNetwork(beta=beta) # Adjacency Network C
    A1_train,A1_test = getTrainingandTestSet(A1)
    A3_train,A3_test = getTrainingandTestSet(A3)
    A13 = np.add(A1,A3)
    A13_train,A13_test = getTrainingandTestSet(A13)
    graphProp(A1,"Network A")
    graphProp(A1_train,"NetworkA Training")
    graphProp(A1_test,"NetworkA Test")
    graphProp(A2,"Network B")
    graphProp(A3,"NetworkC")
    graphProp(A13,"Network A+C")
    
    # =================== Experiment 1  (A --> B) ===================
    A1_beta_train,A1_ranks_train,A1_preds_train,A1_acc_train = trainingExperiment(A1_train,0,0,1)
    A1_preds_test,A1_acc_test = getPredictionsAndAccuracy(A1_beta_train,A1_test,A1_ranks_train)
    A12_preds,A12_acc = getPredictionsAndAccuracy(A1_beta_train,A2,A1_ranks_train)
    
    # =================== Experiment 2 (C --> B) ===================
    A3_beta_train,A3_ranks_train,A3_preds_train,A3_acc_train = trainingExperiment(A3_train,0,0,1)
    A3_preds_test,A3_acc_test = getPredictionsAndAccuracy(A3_beta_train,A3_test,A3_ranks_train)
    A32_preds,A32_acc = getPredictionsAndAccuracy(A3_beta_train,A2,A3_ranks_train)

    # =================== Experiment 3 (A+C --> B) ===================
    A13_beta_train,A13_ranks_train,A13_preds_train,A13_acc_train = trainingExperiment(A13_train,0,0,1)
    A13_preds_test,A13_acc_test = getPredictionsAndAccuracy(A13_beta_train,A13_test,A13_ranks_train)
    A132_preds,A132_acc = getPredictionsAndAccuracy(A13_beta_train,A2,A13_ranks_train)

    experiment_names = ["Experiment 1 (A-->B)","Experiment 2 (C-->B)","Experiment 3 (A+C-->B)"]
    experiments = [[A1_acc_train,A1_acc_test,A12_acc],[A3_acc_train,A3_acc_test,A32_acc],[A13_acc_train,A13_acc_test,A132_acc]]
    data = np.array([
        ['','Training data (80%)','Test data (20%)','Network B (100%)'],
        [experiment_names[0],A1_acc_train,A1_acc_test,A12_acc],
        [experiment_names[1],A3_acc_train,A3_acc_test,A32_acc],
        [experiment_names[2],A13_acc_train,A13_acc_test,A132_acc]])
    dataframe  = pd.DataFrame(data=data[1:,1:],
                         index=data[1:,0],
                         columns=data[0,1:])
    return dataframe


In [241]:
pd.concat(df)

Unnamed: 0,Unnamed: 1,Training data (80%),Test data (20%),Network B (100%)
beta=0.5,Experiment 1 (A-->B),0.619572709406,0.561186382232,0.509306692194
beta=0.5,Experiment 2 (C-->B),0.627319042419,0.562736216926,0.510058986469
beta=0.5,Experiment 3 (A+C-->B),0.60658844133,0.550319922606,0.51994887969
beta=1,Experiment 1 (A-->B),0.648261487327,0.6113420291,0.498731518271
beta=1,Experiment 2 (C-->B),0.650509688738,0.620589153257,0.519473440145
beta=1,Experiment 3 (A+C-->B),0.636235777718,0.593632354839,0.515945244624
beta=1.5,Experiment 1 (A-->B),0.645819293918,0.622961123849,0.506464645197
beta=1.5,Experiment 2 (C-->B),0.647398756451,0.626477766232,0.525046636834
beta=1.5,Experiment 3 (A+C-->B),0.630528822319,0.58299278813,0.516075513562
beta=2,Experiment 1 (A-->B),0.641965760676,0.632345599375,0.530074206843


In [237]:
beta_exp = [0.5,1,1.5,2,2.5]
df = {}
for i in beta_exp:
    df["beta="+str(i)] = experimentBeta(i)

Network A
 no. of nodes= 100, no. of edges= 2007
NetworkA Training
 no. of nodes= 100, no. of edges= 1612
NetworkA Test
 no. of nodes= 100, no. of edges= 395
Network B
 no. of nodes= 100, no. of edges= 1987
NetworkC
 no. of nodes= 100, no. of edges= 1978
Network A+C
 no. of nodes= 100, no. of edges= 3985
Optimal Beta: 0.866707148328163
Accuracy : 0.619572709406
Accuracy : 0.561186382232
Accuracy : 0.509306692194
Optimal Beta: 0.8388936392200721
Accuracy : 0.627319042419
Accuracy : 0.562736216926
Accuracy : 0.510058986469
Optimal Beta: 1.0507163405013917
Accuracy : 0.60658844133
Accuracy : 0.550319922606
Accuracy : 0.51994887969
Network A
 no. of nodes= 100, no. of edges= 1962
NetworkA Training
 no. of nodes= 100, no. of edges= 1560
NetworkA Test
 no. of nodes= 100, no. of edges= 402
Network B
 no. of nodes= 100, no. of edges= 2028
NetworkC
 no. of nodes= 100, no. of edges= 2125
Network A+C
 no. of nodes= 100, no. of edges= 4087
Optimal Beta: 0.6026123583275417
Accuracy : 0.648261487327

In [243]:
A1,scores1 = generateNetwork(beta=1) # Adjacency Network A


In [244]:
A2,scores2 = generateNetwork(beta=1) # Adjacency Network B


In [245]:
A3,scores3 = generateNetwork(beta=1) # Adjacency Network C

In [246]:
A1_train,A1_test = getTrainingandTestSet(A1)

In [247]:
A3_train,A3_test = getTrainingandTestSet(A3)

In [248]:
A13 = np.add(A1,A3)
A13_train,A13_test = getTrainingandTestSet(A13)

In [249]:
graphProp(A1,"Network A")
graphProp(A1_train,"NetworkA Training")
graphProp(A1_test,"NetworkA Test")
graphProp(A2,"Network B")
graphProp(A3,"NetworkC")
graphProp(A13,"Network A+C")




Network A
 no. of nodes= 100, no. of edges= 1978
NetworkA Training
 no. of nodes= 100, no. of edges= 1582
NetworkA Test
 no. of nodes= 100, no. of edges= 396
Network B
 no. of nodes= 100, no. of edges= 1939
NetworkC
 no. of nodes= 100, no. of edges= 1959
Network A+C
 no. of nodes= 100, no. of edges= 3937


In [261]:
def trainingExperiment(A,alpha,l0,l1):
    beta,ranks = trainBetaAndRanks(A,alpha,l0,l1)
    preds,acc =  getPredictionsAndAccuracy(beta,A,ranks)
    return beta,ranks,preds,acc


# =================== Experiment 1  (A --> B) ===================


## Learn A_80%

In [287]:
A1_beta_train,A1_ranks_train,A1_preds_train,A1_acc_train = trainingExperiment(A1_train,0,0,1)

Optimal Beta: 0.6172135468885139
Accuracy : 0.665131898504


## Predict A_20% using A_80% parameters


In [288]:
A1_preds_test,A1_acc_test = getPredictionsAndAccuracy(A1_beta_train,A1_test,A1_ranks_train)


Accuracy : 0.61949475333


## Predict B_100% using A_80%

In [289]:
A12_preds,A12_acc = getPredictionsAndAccuracy(A1_beta_train,A2,A1_ranks_train)


Accuracy : 0.526545241197


# =================== Experiment 2 (C --> B) ===================

## Learn C_80%

In [269]:
A3_beta_train,A3_ranks_train,A3_preds_train,A3_acc_train = trainingExperiment(A3_train,0,0,1)

Optimal Beta: 0.6165092052255828
Accuracy : 0.659602602139


## Predict C_20% using C_80% parameters


In [270]:
A3_preds_test,A3_acc_test = getPredictionsAndAccuracy(A3_beta_train,A3_test,A3_ranks_train)


Accuracy : 0.612442368759


## Predict B_100% using C_80%

In [271]:
A32_preds,A32_acc = getPredictionsAndAccuracy(A3_beta_train,A2,A3_ranks_train)


Accuracy : 0.500275628171


# =================== Experiment 3 (A+C --> B) ===================

## Learn A_C_80%

In [272]:
A13_beta_train,A13_ranks_train,A13_preds_train,A13_acc_train = trainingExperiment(A13_train,0,0,1)

Optimal Beta: 1.0048446412569256
Accuracy : 0.610476014078


## Predict A_C_20% using A_C_80% parameters


In [273]:
graphProp(A13,"")


 no. of nodes= 100, no. of edges= 3937


In [274]:
A13_preds_test,A13_acc_test = getPredictionsAndAccuracy(A13_beta_train,A13_test,A13_ranks_train)


Accuracy : 0.548814450619


## Predict B_100% using A_C_80%

In [275]:
A132_preds,A132_acc = getPredictionsAndAccuracy(A13_beta_train,A2,A13_ranks_train)


Accuracy : 0.505911385029


In [276]:
experiment_names = ["Experiment 1 (A-->B)","Experiment 2 (C-->B)","Experiment 3 (A+C-->B)"]
experiments = [[A1_acc_train,A1_acc_test,A12_acc],[A3_acc_train,A3_acc_test,A32_acc],[A13_acc_train,A13_acc_test,A132_acc]]

In [277]:
data = np.array([
        ['','Training data (80%)','Test data (20%)','Network B (100%)'],
        [experiment_names[0],A1_acc_train,A1_acc_test,A12_acc],
        [experiment_names[1],A3_acc_train,A3_acc_test,A32_acc],
        [experiment_names[2],A13_acc_train,A13_acc_test,A132_acc]])
dataframe  = pd.DataFrame(data=data[1:,1:],
                         index=data[1:,0],
                         columns=data[0,1:])
print(dataframe)

                       Training data (80%) Test data (20%) Network B (100%)
Experiment 1 (A-->B)        0.654668461396   0.61308097098     0.5062469062
Experiment 2 (C-->B)        0.659602602139  0.612442368759   0.500275628171
Experiment 3 (A+C-->B)      0.610476014078  0.548814450619   0.505911385029


# Train 100%

# Trying a small graph. Things we discussed during the meeting

In [195]:
#create  a toy graph
G_small = nx.MultiDiGraph()
G_small.add_edge(0,1)
G_small.add_edge(0,1)
G_small.add_edge(1,0)

0

In [196]:
G_small.edges

OutMultiEdgeView([(0, 1, 0), (0, 1, 1), (1, 0, 0)])

In [197]:
# learn ranks and get optimal temperature 
A1_small = nx.to_numpy_matrix(G_small,nodelist=list(G_small.nodes))
A1_beta_small,A1_ranks_small = trainBetaAndRanks(A1_small,0,0,1)
A1_preds_small,A1_acc_small = getPredictionsAndAccuracy(A1_beta_small,A1_small,A1_ranks_small)

Optimal Beta: 1.9235933878519509
Accuracy : 0.476211459833


In [199]:
(A1_small+A1_small).sum()

6.0

In [552]:
A1_preds_small

{(0, 0): 0,
 (0, 1): 0.92856093675019202,
 (1, 0): 0.071439063249808032,
 (1, 1): 0}

In [553]:
beta_a = np.log(2)*3/4

In [554]:
1/(1+np.exp(-4/3*beta_a))

0.66666666666666663

In [555]:
rank1_small

array([ 0.66666667,  0.        ])

In [265]:
G_small.edges

OutMultiEdgeView([(0, 1, 0), (0, 1, 1), (1, 0, 0)])

In [266]:
beta_a

0.51986038541995894

#  Chess matches. TODO beyond this point


In [79]:
G_chess = getMultiDiGraphFromFile(input_data_dir+"formatted_chess_edgelist.data")



In [27]:
G_chess_train,G_chess_test = getTrainingandTestSet(G_chess)
A_chess_test = nx.to_numpy_matrix(G_chess_test,nodelist=list(G_chess_test.nodes),weight='weight')

In [785]:
graphProp(G_chess)
graphProp(G_chess_train)

no. of nodes= 6832, no. of edges= 36387
no. of nodes= 6832, no. of edges= 29110


In [786]:
alpha,l0,l1=0,0,1
rank_chess_train,tuples_chess = run(G_chess_train,alpha,l0,l1)

A_chess_train = nx.to_numpy_matrix(G_chess_train,nodelist=list(G_chess_train.nodes),weight='weight')

#get opt beta
#temp_chess_train=tl.get_optimal_temperature(rank_chess_train,A_chess_train)
#beta_chess_train = 1/temp_chess_train
print((beta_chess_train))


0.6180489072195094


In [80]:
A_chess = nx.to_numpy_matrix(G_chess)


In [81]:
G_chess.number_of_edges(3106,2307)

5

In [None]:
A_chess