In [1]:
## Import Utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from datetime import datetime, date

In [2]:
## Load Data
with open('sx-stackoverflow-a2q.txt') as f:
    a2q = f.readlines()

In [3]:
## Load Data
with open('sx-stackoverflow-c2q.txt') as f:
    c2q = f.readlines()

In [4]:
## Load Data
with open('sx-stackoverflow-c2a.txt') as f:
    c2a = f.readlines()

In [5]:
## Init Graph
G = nx.DiGraph()

### Functionality 1

In [7]:
## Graph 1

## Init
G_a2q = nx.DiGraph()

t_start = datetime(2010, 10, 1)
t_end = datetime(2010, 11, 1)

## Import data
for line in a2q:
    line = line.replace('\n', '')
    line = line.split(' ') 
    t = datetime.fromtimestamp(int(line[2]))
    t_truncated = date(t.year,t.month, t.day)
    if t_start <= t < t_end :
        if G_a2q.has_edge(line[0], line[1]) :
            G_a2q[line[0]][line[1]]['weight'] += 0.6
            G_a2q[line[0]][line[1]]['timestamp'].append(t_truncated)
        else :
            G_a2q.add_edge(line[0], line[1], weight=0.6)
            G_a2q[line[0]][line[1]]['timestamp']=[t_truncated]

In [8]:
## Graph 2

## Init
G_c2q = nx.DiGraph()

## Import data
for line in c2q:
    line = line.replace('\n', '')
    line = line.split(' ') 
    t = datetime.fromtimestamp(int(line[2]))
    t_truncated = date(t.year,t.month, t.day)
    if t_start <= t < t_end :
        if G_c2q.has_edge(line[0], line[1]) :
            G_c2q[line[0]][line[1]]['weight'] += 0.3
            G_c2q[line[0]][line[1]]['timestamp'].append(t_truncated)
        else :
            G_c2q.add_edge(line[0], line[1], weight=0.3)
            G_c2q[line[0]][line[1]]['timestamp']=[t_truncated]

In [9]:
## Graph 3

## Init
G_c2a = nx.DiGraph()

## Import data
for line in c2a:
    line = line.replace('\n', '')
    line = line.split(' ') 
    t = datetime.fromtimestamp(int(line[2]))
    t_truncated = date(t.year,t.month, t.day)
    if t_start <= t < t_end :
        if G_c2a.has_edge(line[0], line[1]) :
            G_c2a[line[0]][line[1]]['weight'] += 0.1
            G_c2a[line[0]][line[1]]['timestamp'].append(t_truncated)
        else :
            G_c2a.add_edge(line[0], line[1], weight=0.1)
            G_c2a[line[0]][line[1]]['timestamp']=[t_truncated]

#### Is the graph direct or not?

##### Given a graph G=(V,E), if G is a directed graph the sum of the lengths of all the adjacency list is |E|

In [76]:
l = []
for line in nx.generate_adjlist(G_a2q):
    l.append(line.split(' '))

In [115]:
def is_directed(Graph):
    number_nodes=len(list(G_a2q.nodes))
    sum = 0
    for line in l: 
        sum += len(line)
    if (sum - number_nodes) == len(Graph.edges):  #I subtract the number of nodes because nx.generate_adjlist generates as first element of each list the node we're considering
        return True 
    else:
        return False


In [116]:
is_directed(G_a2q)

True

In [117]:
# Check
nx.is_directed(G_a2q)

True

#### Number of users

##### the number of users is simply the number of nodes

In [118]:
def n_users(Graph):
    l_nodes=list(Graph.nodes)
    n_nodes=len(l_nodes)
    return(n_nodes)

#### Number of answers/comments

##### the number of answers/comments is simply the number of edges

In [119]:
def n_answers(Graph):
    l_edges=list(Graph.edges)
    n_edges=len(l_edges)
    return(n_edges)

#### Average number of links per user

In [120]:
def average(n_edges,n_nodes):
    return(n_edges/n_nodes)

#### Density degree of the graph

##### The density degree for a directed graph is : D = |E| / (|V| * (|V| - 1))

In [121]:
def density_degre(n_edges, n_nodes):
    return(n_edges/(n_nodes*(n_nodes-1)))

#### Is the graph sparse or dense?

##### A sparse graph is a graph whose density D is $0 \leq D < \frac {1} {2}.$

In [122]:
def graph_mode(density):
    if (density>=0) & (density < 1/2):
        return("The graph is sparse because its density D is  0 <= D < 1/2")
    else:
        return("The graph is dense because its density D is  1/2 < D < 1")

In [123]:
def functionality_1(Graph):
    isdirected=is_directed(Graph)
    numbernodes=n_users(Graph)
    numberedges=n_answers(Graph)
    average_=average(numberedges,numbernodes)
    densitydegree=density_degre(numberedges, numbernodes)
    print("Functionality 1 - Get the overall features of the graph")
    print("Is the graph directed?: " + str(isdirected))
    print("Number of users: " + str(numbernodes))
    print("Number of answers: " + str(numberedges))
    print("Average number of links per user: " + str(average_))
    print("Graph density: " +  str(densitydegree))
    print(graph_mode(densitydegree))

In [124]:
functionality_1(G_a2q)

Functionality 1 - Get the overall features of the graph
Is the graph directed?: True
Number of users: 48647
Number of answers: 123536
Average number of links per user: 2.539437169815199
Graph density: 5.2202383953772136e-05
The graph is sparse because its density D is  0 <= D < 1/2
