In [1]:
import pandas as pd
import graph as helper
import networkx as nx
import numpy as np
import multiprocessing as mp
import itertools
from datetime import datetime
import math

In [27]:
# Each triad type is represented by four digits as each digit may be 0 or 1. 
# Let w be the common neighbor of two nodes in a triad, u and v:
# The first digit represent the direction of the edge between u,w: 0 if u points to w, else 1. 
# The second digit represents the sign of the edge between u,w: 0 if it`s positive, else 1.
# The third digit represents the direction of the edge between v,w: 0 if v points to w, else 1. 
# The fourth digit represents the sign of the edge between v,w: 0 if it`s positive, else 1.

TRIADS_TYPES = [(0,0,0,0),
               (0,0,0,1),
               (0,0,1,0), 
               (0,0,1,1),
               (0,1,0,0),
               (0,1,0,1),
               (0,1,1,0),
               (0,1,1,1),
               (1,0,0,0),
               (1,0,0,1),
               (1,0,1,0),
               (1,0,1,1),
               (1,1,0,0),
               (1,1,0,1),
               (1,1,1,0),
               (1,1,1,1)]

# currently redundent
REVERSE_TRIADS = {(0,0,0,0): (0,0,0,0),
               (0,0,0,1): (0, 1, 0, 0),
               (0,0,1,0): (1, 0, 0, 0),
               (0,0,1,1): (1, 1, 0, 0),
               (0,1,0,0): (0, 0, 0, 1),
               (0,1,0,1): (0, 1, 0, 1),
               (0,1,1,0): (1, 0, 0, 1),
               (0,1,1,1): (1, 1, 0, 1),
               (1,0,0,0): (0, 0, 1, 0),
               (1,0,0,1): (0, 1, 1, 0),
               (1,0,1,0): (1, 0, 1, 0),
               (1,0,1,1): (1, 1, 1, 0),
               (1,1,0,0): (0, 0, 1, 1),
               (1,1,0,1): (0, 1, 1, 1),
               (1,1,1,0): (1, 0, 1, 1),
               (1,1,1,1): (1, 1, 1, 1)}

NUMBER_OF_CORES = 6

def build_triads_df(graph):
    triads_data = {str(triad) : [0] for triad in TRIADS_TYPES}
    triads_index = []
    for (u,v) in itertools.combinations(graph.nodes(), 2):
        triads_index.extend([(u,v),(v,u)])
                            
    triads_df = pd.DataFrame(triads_data, triads_index)
    return triads_df, len(triads_index)
    
def get_triad_status(u, w, v, triad, graph_df):
    first_edge = 0
    second_edge = 0
    
    sign1 = -1 if triad[1] else 1
    sign2 = -1 if triad[3] else 1
    
    if (not triad[0]):
        first_edge = graph_df[(graph_df.ToNodeId == int(w)) & (graph_df.FromNodeId == int(u)) & (graph_df.Sign == sign1)].shape[0]
    else:
        first_edge = graph_df[(graph_df.ToNodeId == int(u)) & (graph_df.FromNodeId == int(w)) & (graph_df.Sign == sign1)].shape[0]
        
    if (not triad[2]):
        second_edge = graph_df[(graph_df.ToNodeId == int(w)) & (graph_df.FromNodeId == int(v)) & (graph_df.Sign == sign2)].shape[0]
    else:
        second_edge = graph_df[(graph_df.ToNodeId == int(v)) & (graph_df.FromNodeId == int(w)) & (graph_df.Sign == sign2)].shape[0]
        
    return 1 if (first_edge and second_edge) else 0

def process_frame(triads_df, undirected_graph, graph_df):
    for (u,v), row in triads_df.iterrows():
            triads_dict_for_pair = {triad : 0 for triad in TRIADS_TYPES}
            for w in sorted(nx.common_neighbors(undirected_graph, u, v)):
                for triad in TRIADS_TYPES:
                    triad_status = get_triad_status(u, w, v, triad, graph_df)
                    if triad_status:
                        print(u,w,v,triad,triad_status)
                        triads_dict_for_pair[triad] += 1
            for triad in triads_dict_for_pair.keys():
                triads_df.at[(u,v), str(triad)] += triads_dict_for_pair[triad]
            
            
def compute_triads(tsv_file):
    graph_df = pd.read_csv(tsv_file, sep='\t')
    graph = helper.build_graph(tsv_file)
    undirected_graph = graph.to_undirected()
                            
    triads_df, rows_count = build_triads_df(graph)
    triads_df.to_csv("tr1", sep = "\t")
    cs = math.floor(rows_count/NUMBER_OF_CORES)
    
    reader = pd.read_table("tr1", chunksize=cs)
    pool = mp.Pool(NUMBER_OF_CORES) 

    for df in reader:
        # process each data frame
        f = pool.apply_async(process_frame,[df, undirected_graph, graph_df])

    return triads_df

                    



In [28]:
time_of_start_computation = datetime.now()
td = compute_triads("./datasets/wiki-demo-5.tsv").to_csv("SEE IT IS SIMILIAR FOR REAL", sep = "\t")
time_of_end_computation = datetime.now()
triads_time = time_of_end_computation - time_of_start_computation

In [29]:
triads_time

datetime.timedelta(microseconds=122701)

In [10]:
time_of_start_computation = datetime.now()
compute_triads("./datasets/wiki-demo-5.tsv").to_csv("output5", sep = "\t")
time_of_end_computation = datetime.now()
triads_time = time_of_end_computation - time_of_start_computation

30 3 54 (1, 0, 1, 0) 1
30 3 214 (1, 0, 1, 0) 1
30 3 28 (1, 0, 1, 0) 1
30 3 286 (1, 0, 1, 0) 1
54 3 214 (1, 0, 1, 0) 1
54 3 28 (1, 0, 1, 0) 1
54 3 286 (1, 0, 1, 0) 1
214 3 28 (1, 0, 1, 0) 1
214 3 286 (1, 0, 1, 0) 1
28 3 286 (1, 0, 1, 0) 1


In [11]:
triads_time

datetime.timedelta(microseconds=289937)

In [46]:
print(6/2)

3.0


In [50]:
print(math.floor(6/2))

3
