# Data analysis with Neo4j and python for RESIDE_IN data

Imports:

In [1]:
from py2neo import Graph, Database , NodeMatcher

Conect to database:

In [2]:
default_db = Database('bolt://neo4j-hdx:7687', auth=("neo4j", "test"))

In [4]:
default_db.name

'graphHDX.db'

Default graph in the database:

In [16]:
default_db.default_graph.name

'data'

Instanciate a Graph object conecting to the gdefault graph 'data' exposed in 'graphHDX.db':

In [12]:
graph = Graph('bolt://neo4j-hdx:7687', auth=("neo4j", "test"))

In [13]:
graph.database.name

'graphHDX.db'

Get the number of nodes in the graph:

In [20]:
len(graph.nodes)

7376

Get the number of relationship in the graph:

In [19]:
len(graph.relationships)

124501

Explore the Schema of the graph:

In [28]:
graph.schema.node_labels

frozenset({'Country', 'CountryYear'})

In [34]:
rel_types = graph.schema.relationship_types
rel_types

frozenset({'1960',
           '1961',
           '1962',
           '1963',
           '1964',
           '1965',
           '1966',
           '1967',
           '1968',
           '1969',
           '1970',
           '1971',
           '1972',
           '1973',
           '1974',
           '1975',
           '1976',
           '1977',
           '1978',
           '1979',
           '1980',
           '1981',
           '1982',
           '1983',
           '1984',
           '1985',
           '1986',
           '1987',
           '1988',
           '1989',
           '1990',
           '1991',
           '1992',
           '1993',
           '1994',
           '1995',
           '1996',
           '1997',
           '1998',
           '1999',
           '2000',
           '2001',
           '2002',
           '2003',
           '2004',
           '2005',
           '2006',
           '2007',
           '2008',
           '2009',
           '2010',
           '2011',
           '

Get all years with available data from the relationship_types:

In [33]:
years_list = [rel for rel in graph.schema.relationship_types if rel.isdigit()]
years_list

['1960',
 '1964',
 '2015',
 '1995',
 '1982',
 '2017',
 '1985',
 '1973',
 '2006',
 '2004',
 '2000',
 '2014',
 '1997',
 '1962',
 '2010',
 '1990',
 '2002',
 '2008',
 '2011',
 '1996',
 '2003',
 '2012',
 '2007',
 '1988',
 '1989',
 '2016',
 '2009',
 '1966',
 '1969',
 '1983',
 '1968',
 '1999',
 '1976',
 '1971',
 '1998',
 '1974',
 '1972',
 '1992',
 '1979',
 '1961',
 '1981',
 '1993',
 '1970',
 '2013',
 '1975',
 '1978',
 '1994',
 '1965',
 '2001',
 '1991',
 '1987',
 '1963',
 '1984',
 '1980',
 '1977',
 '1967',
 '2005',
 '1986']

In [49]:
list(graph.nodes.match("CountryYear", year=2009))[0:5]

[(_17:CountryYear {country: 'Netherlands', countryearId: 'Netherlands2009', pop_growth_percentage: 0.5142845511436462, population: 16530388.0, urban_pop_percentage: 86.28800201416016, year: 2009}),
 (_38:CountryYear {country: 'Antigua and Barbuda', countryearId: 'Antigua and Barbuda2009', pop_growth_percentage: 1.1856592893600464, population: 93581.0, urban_pop_percentage: 26.81800079345703, year: 2009}),
 (_69:CountryYear {country: 'Cabo Verde', countryearId: 'Cabo Verde2009', pop_growth_percentage: 1.0600026845932007, population: 496963.0, urban_pop_percentage: 61.01599884033203, year: 2009}),
 (_93:CountryYear {country: 'Mauritius', countryearId: 'Mauritius2009', pop_growth_percentage: 0.26553767919540405, population: 1247429.0, urban_pop_percentage: 41.66600036621094, year: 2009}),
 (_144:CountryYear {country: 'Ethiopia', countryearId: 'Ethiopia2009', pop_growth_percentage: 2.6470654010772705, population: 85416256.0, urban_pop_percentage: 16.90999984741211, year: 2009})]

In [36]:
matcher = NodeMatcher(graph)

In [41]:
matcher.match("CountryYear", year=2009).first()

(_17:CountryYear {country: 'Netherlands', countryearId: 'Netherlands2009', pop_growth_percentage: 0.5142845511436462, population: 16530388.0, urban_pop_percentage: 86.28800201416016, year: 2009})

In [43]:
from pygments.lexers import get_lexer_by_name

In [44]:
lexer = get_lexer_by_name("py2neo.cypher")

In [48]:
list(lexer.get_statements("MATCH p=()-[r:2009]->() RETURN p LIMIT 25"))

['MATCH p=()-[r:2009]->() RETURN p LIMIT 25']