# Homework

In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from datetime import datetime

## Graph

In [2]:
def from_time_int_to_dates(time_interval):
    # Convert the time interval in start and end dates 
    time_interval = tuple(map(datetime.fromisoformat, time_interval)) # converting time interval into datetime format
    time_interval = tuple(map(datetime.timestamp, time_interval)) # converting time interval into POSIX timestamp 
    start_d = int(time_interval[0]) #converting to string to compare with the txt
    end_d = int(time_interval[1])
    return start_d, end_d

In [3]:
def get_full_graph(file):
    
    # Initialize the directed graph
    G = nx.DiGraph()
    
    # Create mapping of files
    map_files = {1: "sx-stackoverflow-a2q.txt", 2:"sx-stackoverflow-c2q.txt", 3:"sx-stackoverflow-c2a.txt"}
    
    # Select the file chosen, open it and read the lines
    with open(map_files[file], "r", encoding="UTF-8") as f:
        for line in tqdm(f.readlines()):
            
            # Parse the line
            elems = line.split(' ')
            
            # Add the edge to the graph if it is not present
            if (elems[0], elems[1]) not in G.edges():
                G.add_edge(elems[0], elems[1])
                    
    return G

In [None]:
graph1_full = get_full_graph(1)

 78%|███████▊  | 13913140/17823525 [19:07<04:44, 13727.55it/s] 

In [None]:
graph2_full = get_full_graph(2)

In [None]:
graph3_full = get_full_graph(3)

In [None]:
print(graph1_full)
print(graph2_full)
print(graph3_full)

#### Write the 3 full graphs into files

In [None]:
nx.write_gml(graph1_full, "graph1_full")

In [None]:
nx.write_gml(graph2_full, "graph2_full")

In [None]:
nx.write_gml(graph3_full, "graph3_full")

#### Read the 3 full graphs from files

In [None]:
nx.read_gml("graph1_full")
nx.read_gml("graph2_full")
nx.read_gml("graph3_full")

In [4]:
def get_graph(time_interval, file = 3):
    
    # Initialize the directed graph
    G = nx.DiGraph()
    
    # Create mapping of files and mapping of weights
    map_files = {1: "sx-stackoverflow-a2q.txt", 2:"sx-stackoverflow-c2q.txt", 3:"sx-stackoverflow-c2a.txt"}
    map_weights = {1: 1.0, 2: 2/3, 3: 1/2}
    
    # Get the start and end dates 
    start, end = from_time_int_to_dates(time_interval)
    
    # Select the file chosen, open it and read the lines
    with open(map_files[file], "r", encoding="UTF-8") as f:
        for line in tqdm(f.readlines()):
            
            # Parse the line
            elems = line.split(' ')
            
            # Add to the graph if it is in the time interval
            if start <= int(elems[2]) <= end:
                # If the edge already exists --> increment the weight, else simply add the new edge
                if (elems[0], elems[1]) in G.edges():
                    G[elems[0]][elems[1]]['weight'] += float(map_weights[file])
                else:
                    G.add_edge(elems[0], elems[1], weight = float(map_weights[file]))
                    
    return G

In [5]:
graph1 = get_graph(time_interval=("2008-11-01","2008-11-02"), file=1)

100%|██████████| 17823525/17823525 [00:10<00:00, 1645799.10it/s]


In [6]:
graph2 = get_graph(time_interval=("2008-11-01","2008-11-02"), file=2)

100%|██████████| 20268151/20268151 [00:12<00:00, 1616983.05it/s]


In [7]:
graph3 = get_graph(time_interval=("2008-11-01","2008-11-02"), file=3)

100%|██████████| 25405374/25405374 [00:16<00:00, 1586888.43it/s]


## Merged

This function will merge two graphs that were obtained by the function **get_graph()** with the same time interval. 

In [8]:
def merged_graph(graph_1, graph_2):
    
    # Iterate over the edges from the second graph
    for edge_2 in graph_2.edges(data = True):
        # If the edge of graph 2 is also in graph 1, only sum weights
        if (edge_2[0],edge_2[1]) in graph_1.edges():
            graph_1[edge_2[0]][edge_2[1]]['weight'] += float(edge_2[2]['weight'])
        # Else add the edge of graph 2 also in graph 1
        else:
            graph_1.add_edge(edge_2[0], edge_2[1], weight = float(edge_2[2]['weight']))
            
    return graph_1

In [9]:
merged = merged_graph(graph1, graph2)
merged = merged_graph(merged, graph3)

In [10]:
dict_merged = nx.to_dict_of_dicts(merged)

In [16]:
def directed(dict_graph):
    direct = False
    for node, neighbours in dict_graph.items():
        for neighbour in list(neighbours.keys()):
            if node not in list(dict_graph[neighbour].keys()):
                direct = True
                break
    return direct

In [17]:
print(directed(dict_merged))

True
