In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
os.chdir("/content/drive/MyDrive/ADM/HW5/")

In [3]:
import networkx as nx
from tqdm import tqdm
import datetime

##EXPLORE THE DATASET AND FIND MAXIMUM AND MINIMUM TIMESTAMPS

In [4]:
def read_data(file,max,min):
  for line in tqdm(file):
    
    #Read the line
    splitted_line=line.split(" ")
    interaction_time=int(splitted_line[2])
    if interaction_time > max:
      max=interaction_time
    if interaction_time < min:
      min=interaction_time
  return(max,min)

In [5]:
#Open dataset files
a2q=open("a2q.txt")
c2a=open("c2a.txt")
c2q=open("c2q.txt")

#Define maximum and minimum seen timestamp (1647265600 correspond to March 2022) 
max=0
min=1647265600

#Read maximum and minimum timestamps on top of 3 dataset files
max,min=read_data(a2q,max,min)
max,min=read_data(c2a,max,min)
max,min=read_data(c2q,max,min)

#Close dataset files
a2q.close()
c2a.close()
c2q.close()

print("\nMaximum seen timestamp is:",datetime.datetime.fromtimestamp(max).strftime("%Y-%m-%d"),sep=" ")
print("\nMinimum seen timestamp is:",datetime.datetime.fromtimestamp(min).strftime("%Y-%m-%d"),sep=" ")


17823525it [00:16, 1093123.42it/s]
25405374it [00:23, 1075943.82it/s]
20268151it [00:18, 1070025.82it/s]


Maximum seen timestamp is: 2016-03-06

Minimum seen timestamp is: 2008-08-01





#Read data

In [6]:
def create_graph_layer(G,file_type,file_name,include_self_loops,time_interval):
  
  for line in tqdm(file_type):
    
    #Read the line
    splitted_line=line.split(" ")
    #Extract source node,destination node and interaction time
    source_node=int(splitted_line[0])
    destination_node=int(splitted_line[1])
    #interaction_time=int(int(splitted_line[2])/3600)
    #Convert unix timestamp into datetime format
    interaction_time=datetime.datetime.fromtimestamp(int(splitted_line[2])).strftime("%Y-%m-%d")

    #Disregard self loops if required and take only first maximum_time hours of the dataset
    if ((source_node!=destination_node) or include_self_loops) and time_interval[0] < interaction_time<time_interval[1]:
      
      #If G has already an edge, increase its weight and update the interaction time
      if G.has_edge(source_node,destination_node) and file_name in G[source_node][destination_node]:
        G[source_node][destination_node][file_name][file_name]=1/(1/G[source_node][destination_node][file_name][file_name]+1)
        G[source_node][destination_node][file_name]["time"].append(interaction_time)
      
      #If that edge is noy yet existing, initialize its type and value depending on the file we are reading
      else:
        if file_name=="a2q":
          G.add_edge(source_node, destination_node,key=file_name, a2q=1,time=[interaction_time])
        if file_name=="c2q":
          G.add_edge(source_node, destination_node,key=file_name, c2q=1,time=[interaction_time])
        if file_name=="c2a":
          G.add_edge(source_node, destination_node,key=file_name, c2a=1,time=[interaction_time])
  return(G)

  

In [7]:
#Open dataset files
a2q=open("a2q.txt")
c2a=open("c2a.txt")
c2q=open("c2q.txt")


#Create multidirected graph
G = nx.MultiDiGraph()
G=create_graph_layer(G,c2a,"c2a",include_self_loops=False,time_interval=["2015-01-01", "2016-03-01"])
G=create_graph_layer(G,a2q,"a2q",include_self_loops=False,time_interval=["2015-01-01", "2016-03-01"])
G=create_graph_layer(G,c2q,"c2q",include_self_loops=False,time_interval=["2015-01-01", "2016-03-01"])

#Close dataset files
a2q.close()
c2a.close()
c2q.close()

25405374it [02:55, 144456.13it/s]
17823525it [02:14, 132874.93it/s]
20268151it [02:38, 128008.75it/s]


In [8]:
#For the construction of the functionality 1, the graph will be considered unweighted
def functionality_1(G,graph_type):

  #Define auxiliary variables to store the number of users,
  #of total interactions, of average interactions, connected flag,
  #and graph density value. Unique_nodes is initialized as set
  #Since we don't want to count the same user in the graph twice
  unique_nodes=set([])
  total_interactions=0
  average_interactions=0
  undirected="True"
  graph_density=0

  #For every edge in our graph:
  for edge in list(G.edges(data=True)):
    
    #Identify source and destination node + the attributes of the edge
    source_node=edge[0]
    destination_node=edge[1]
    attributes =edge[2]

    #If the edge is of the kind inputted by the user inside the functionality:
    #(it basically mean we will work only on 1 of the 3 different graphs) 
    if graph_type in attributes:
      
      #If there is an edge of the same type joining both (source node, destination node)
      #and (destination node, source node), that edge is considered as undirected. If so
      #doesn't happen, the directed flag is set to false and the directed flag is not runned
      #for the next edges (as we already understood the graph isn't  undirected)
      if (undirected=="True") and (source_node in G[destination_node]) and (graph_type in G[destination_node][source_node]):
        undirected="True"
      else:
        undirected="False"  
      
      #Append source and destination node to the unique node set
      unique_nodes.update([source_node,destination_node])
      
      #Increase the value of the total interactions
      #total_interactions+=1/attributes.get("a2q")
      total_interactions+=1

  #Compute the number of users as the cardinality of the unique_nodes set
  n_users=len(unique_nodes)

  #If more than one user if found, compute graph density and average number of interactions
  if n_users>0:
    average_interactions=total_interactions/n_users
    graph_density=total_interactions/(n_users*(n_users-1))

  return(undirected,n_users,total_interactions,average_interactions,graph_density)


In [9]:
#Compute outputs from functionality 1
undirected,n_users,total_interactions,average_interactions,graph_density=functionality_1(G,graph_type="c2a")

In [15]:
#Print output from functionality 1
print("Unirected:",undirected,"\nNumber of users:",n_users,
      "\nTotal interactions:",total_interactions,
      "\nAverage interactions per user",round(average_interactions,5),"\nGraph density",graph_density,sep=" ")

Unirected: False 
Number of users: 778496 
Total interactions: 2408848 
Average interactions per user 3.09423 
Graph density 3.9746343683280205e-06
