In [2]:
import pandas as pd
import graph as helper
import networkx as nx
import itertools
from datetime import datetime

In [3]:
def compute_degrees(tsv_file):
    # build new data frame for degrees: 
    df = pd.read_csv(tsv_file, sep='\t')
    num_of_nodes = max(df.max())
    data = {'positive_in_degree': [0], 'positive_out_degree':[0], 'negative_in_degree': [0], 'negative_out_degree': [0]}
    degrees_df = pd.DataFrame(data, index = [node_id for node_id in range(num_of_nodes)])
    
    # fill new data frame with data:
    for node_id in range(num_of_nodes):
        positive_in = df[(df.ToNodeId == node_id) & (df.Sign == 1)].shape[0]
        positive_out = df[(df.FromNodeId == node_id) & (df.Sign == 1)].shape[0]
        negative_in = df[(df.ToNodeId == node_id) & (df.Sign == -1)].shape[0]
        negative_out = df[(df.FromNodeId == node_id) & (df.Sign == -1)].shape[0]
            
        degrees_df.at[node_id,'positive_in_degree'] = positive_in
        degrees_df.at[node_id, 'positive_out_degree'] = positive_out
        degrees_df.at[node_id, 'negative_in_degree'] = negative_in
        degrees_df.at[node_id, 'negative_out_degree'] = negative_out
    
    return degrees_df

## TODO: USE THE GRAPH DIRECTLY AND NOT NUM OF NODES!! ## 

In [4]:
# Each triad type is represented by four digits as each digit may be 0 or 1. 
# First dif
TRIADS_TYPES = [(0,0,0,0),
               (0,0,0,1),
               (0,0,1,0), 
               (0,0,1,1),
               (0,1,0,0),
               (0,1,0,1),
               (0,1,1,0),
               (0,1,1,1),
               (1,0,0,0),
               (1,0,0,1),
               (1,0,1,0),
               (1,0,1,1),
               (1,1,0,0),
               (1,1,0,1),
               (1,1,1,0),
               (1,1,1,1)]

def triad_to_number(triad):
    return TRIADS_TYPES.index(triad)

def get_triad_status(u, w, v, triad, graph_df):
    first_edge = 0
    second_edge = 0
    
    if (not triad[0]):
        first_edge = graph_df[(graph_df.ToNodeId == w) & (graph_df.FromNodeId == u) & (graph_df.Sign == -1 if triad[1] else 1)].shape[0]
    else:
        first_edge = graph_df[(graph_df.ToNodeId == u) & (graph_df.FromNodeId == w) & (graph_df.Sign == -1 if triad[1] else 1)].shape[0]
        
    if (not triad[2]):
        second_edge = graph_df[(graph_df.ToNodeId == w) & (graph_df.FromNodeId == v) & (graph_df.Sign == -1 if triad[3] else 1)].shape[0]
    else:
        second_edge = graph_df[(graph_df.ToNodeId == v) & (graph_df.FromNodeId == w) & (graph_df.Sign == -1 if triad[3] else 1)].shape[0]
        
    return 1 if (first_edge and second_edge) else 0
          
def compute_triads(tsv_file):
    graph_df = pd.read_csv(tsv_file, sep='\t')
    graph = helper.build_graph(tsv_file)
    undirected_graph = graph.to_undirected()
    
    triads_data = {str(triad) : [0] for triad in TRIADS_TYPES}
    triads_index = [(u,v) for (u,v) in itertools.permutations(graph.nodes(), 2)]
    triads_df = pd.DataFrame(triads_data, triads_index)
    
    for (u,v) in itertools.permutations(graph.nodes(), 2):
        for w in sorted(nx.common_neighbors(undirected_graph, u, v)):
            for triad in TRIADS_TYPES:
                triads_df.at[(u,v), str(triad)] = triads_df.at[(u,v), str(triad)] + get_triad_status(u, w, v, triad, graph_df)

    return triads_df

In [None]:
time_of_start_computation = datetime.now()
triads_df = compute_triads("./datasets/wiki.tsv")
time_of_end_computation = datetime.now()
triads_time = time_of_end_computation - time_of_start_computation

In [None]:
triads_df

In [None]:
triads_time

In [None]:
time_of_start_computation_degrees = datetime.now()
degrees_df = compute_degrees("./datasets/wiki.tsv")
time_of_end_computation_degrees = datetime.now()
degrees_time = time_of_end_computation_degrees - time_of_start_computation_degrees

In [None]:
degrees_df 

In [None]:
degrees_time