In [1]:
import pandas as pd
import numpy as np
from py2neo import Graph
from py2neo import Node
from py2neo import Relationship

# Commuting flow between states: Degree centrality

## Preprocessing the data

The data is read. The missing values result from lines indicating commuting flows to other workplaces outside the USA. These rows are deleted.

In [2]:
commuting_flow=pd.read_excel("RawData/commuting_flows.xlsx",dtype={'State FIPS Residence': object,'County FIPS Residence':object,'State FIPS Work':object,'County FIPS Work':object})
commuting_flow.drop(columns=[" Margin of Error"], inplace=True)
commuting_flow.dropna(inplace=True)

Combining state and county FIPS code to one FIPS code

In [3]:
commuting_flow['State FIPS Work']=commuting_flow['State FIPS Work'].str[1:]
commuting_flow["FIPS_Residence"]=commuting_flow["State FIPS Residence"]+commuting_flow["County FIPS Residence"]
commuting_flow["FIPS_Work"]=commuting_flow["State FIPS Work"]+commuting_flow["County FIPS Work"]
commuting_flow.drop(columns=['State FIPS Work', 'State FIPS Residence','County FIPS Residence','County FIPS Work'],inplace=True)

## Preparing graph based on commuting flows between states

We only consider the commuting flow between states.

In [4]:
between_state=commuting_flow[commuting_flow["State Residence"]!=commuting_flow["State Work"]]

We compute the nodes and edges and save the results in the folder Data_input_neo4j

In [5]:
commuting_edges=between_state.groupby(["State Residence","State Work"],as_index=False).agg({'Workers in Commuting Flow':'sum'}).rename(columns={'Workers in Commuting Flow':'Commuting','State Residence':'source','State Work':'target'})
commuting_nodes=pd.DataFrame(commuting_edges['source'].unique(),columns=['State'])

In [6]:
commuting_edges.to_csv('Data_input_neo4j/commuting_edges.csv',index=False)
commuting_nodes.to_csv('Data_input_neo4j/commuting_nodes.csv',index=False)

## Commuting flow degree centrality

Make sure that all the datafiles in the folder Data_input_neo4j are in the input folder of your database.

### Make graph

In [7]:
graph = Graph(password= "xxx")

Insert the nodes and relationships.

In [8]:
graph.run("MATCH (n) DETACH DELETE n")
query="""LOAD CSV WITH HEADERS FROM $file as row
        with row
        CALL apoc.create.node(['State','Place'],{name:row.State}) YIELD node
        RETURN distinct true"""
graph.run(query,file='file:///commuting_nodes.csv')
query="""LOAD CSV WITH HEADERS FROM $file as row
        with row
        MATCH (source:State{name:row.source})
        MATCH (target:State{name:row.target})
        CALL apoc.create.relationship(source,"COMMUTING",{weight:toFloat(row.Commuting)},target) YIELD rel
        RETURN distinct true"""
graph.run(query,file='file:///commuting_edges.csv')

true
True


Create an in-memory graph, called 'Commuting'.

In [9]:
query="""CALL gds.graph.create('Commuting','State',{COMMUTING:{properties:'weight', orientation:'UNDIRECTED'}})"""
graph.run(query)

nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,createMillis
"{State: {properties: {}, label: 'State'}}","{COMMUTING: {orientation: 'UNDIRECTED', aggregation: 'DEFAULT', type: 'COMMUTING', properties: {weight: {property: 'weight', aggregation: 'DEFAULT', defaultValue: null}}}}",Commuting,52,4748,127


### Perform degree centrality

We can use the gds.degree.stream algorithm to calculate the degree centrality of the states.

In [10]:
query="""CALL gds.degree.stream('Commuting',{relationshipWeightProperty:'weight'})
        YIELD nodeId, score
        RETURN gds.util.asNode(nodeId).name AS Place, score AS Degree_Centrality_com
        ORDER by Degree_Centrality_com desc"""
result = graph.run(query).data() 
df_degree_commuting=pd.DataFrame(result)

The in-memory graph is dropped again.

In [11]:
graph.run("CALL gds.graph.drop('Commuting')")

graphName,database,memoryUsage,sizeInBytes,nodeProjection,relationshipProjection,nodeQuery,relationshipQuery,nodeCount,relationshipCount,nodeFilter,relationshipFilter,density,creationTime,modificationTime,schema
Commuting,neo4j,,-1,"{State: {properties: {}, label: 'State'}}","{COMMUTING: {orientation: 'UNDIRECTED', aggregation: 'DEFAULT', type: 'COMMUTING', properties: {weight: {property: 'weight', aggregation: 'DEFAULT', defaultValue: null}}}}",,,52,4748,,,1.790346907993967,datetime('2022-05-29T10:06:39.502472300+02:00'),datetime('2022-05-29T10:06:43.310351900+02:00'),"{relationships: {COMMUTING: {weight: 'Float (DefaultValue(NaN), PERSISTENT, Aggregation.DEFAULT)'}}, nodes: {State: {}}}"


The results are saved into a csv file.

In [12]:
df_degree_commuting.to_csv('PreprocessedData/degree_commuting_between_states.csv',index=False)