# Creation of complex networks

In [36]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt


folder='../data/'
#In this case, only the regular period is used. The same should be done for the carnival period.
df=pd.read_csv(folder+'data_filter_no_carnival.csv')
df_titolo=pd.read_csv(folder+'kind_of_tickets_segmentation_or.csv')
#It is important to organise the data in order to create the matrix efficiently.
df.sort_values(['seriale','data_validazione'],ignore_index=True,inplace=True)
df['data_validazione']=pd.to_datetime(df['data_validazione'])
df.FERMATA=df.FERMATA.astype(int)
#They keep the data of residents and tourists separate.
df_residents=df[df.titolo.isin(df_titolo[(df_titolo.type=='Worker') | (df_titolo.type=='Retired') | (df_titolo.type=='Student')].titolo.to_list())]    
df_tourist=df[df.titolo.isin(df_titolo[(df_titolo.type=='Tourist')].titolo.to_list())]     

The following section describes how the network is constructed.

In [37]:
#We obtain the nodes on the complete data set so that we can maintain the network names and ensure that all nodes are represented.
stops=set(df.FERMATA.to_list())
dic_stops=dict()
for p,s in enumerate(stops):
    dic_stops[s]=p
def to_matrix(df):
    """
    Convert the data frame with stops sorted by identifier and date into network links. To do this, use an adjacency matrix.
    :param df: data frame with stops sorted by identifier
    :type df: DataFrame
    :return: adjacency matrix
    :rtype: np.list
    """
    matrix=np.zeros([len(stops),len(set(stops))])
    seriale=0
    data_validazione=pd.to_datetime('2000-01-01')
    fermata=-1
    for i in range(len(df)):
        if seriale==df.iloc[i,2] and data_validazione.date()==df.iloc[i,1].date():
            matrix[dic_stops[fermata],dic_stops[df.iloc[i,10]]]+=1
        data_validazione=df.iloc[i,1]
        seriale=df.iloc[i,2]
        fermata=df.iloc[i,10]
    return matrix
mx=to_matrix(df_residents)

The adjacency matrix is converted into a complex network and the nodes are renamed so that instead of numbers, they are the names of the stops.

In [38]:
G=nx.from_numpy_matrix(mx,create_using=nx.DiGraph)
rename=dict()
for i in dic_stops:
    rename[dic_stops[i]]=df[df['FERMATA']==i].descrizione.to_list()[0]
G = nx.relabel_nodes(G, rename)   

The graph is saved.

In [39]:
nx.write_graphml_lxml(G, "grafo.graphml")

The locations of the nodes are saved so that they can be used later in map representations.

In [40]:
df.drop_duplicates(subset=['DESCRIZIONE'])[['DESCRIZIONE','lon','lat']].to_csv('locations.csv')