# Common errors and debugging

In [None]:
import csv
import igraph

## Common *igraph* issues

### No nodes in the graph

In [None]:
with open('./data/musae_git_edges.csv', 'r') as c:
    reader = csv.reader(c)
    edges = [row for row in reader][1:]

The code below should produce an error `ValueError: no such vertex: '0'`:

In [None]:
g = igraph.Graph()
g.add_edges(edges)

In [None]:
edges = [[int(edge[0]), int(edge[1])] for edge in edges]
g.add_edges(edges)

In [None]:
nodes = set([node for edge in edges for node in edge])
print(len(nodes)) 

In [None]:
g.add_vertices(len(nodes))
g.add_edges(edges)

In [None]:
print(len(g.es))

### Node IDs in *igraph*

In [None]:
g = igraph.Graph()

In [None]:
new_edges = edges + [[40000, 0], [99999, 1], [40000, 99999]]

In [None]:
new_nodes = set([node for edge in new_edges for node in edge])
print(len(new_nodes))  

In [None]:
g.add_vertices(len(new_nodes))
g.add_edges(new_edges)

In [None]:
new_nodes = sorted(list(new_nodes))
igraph_ids = {node: igraph_id for igraph_id, node in enumerate(new_nodes)}

In [None]:
print(igraph_ids[40000])
print(igraph_ids[99999])
print(igraph_ids[0])

In [None]:
ig_edges = [[igraph_ids[edge[0]], igraph_ids[edge[1]]] for edge in new_edges]
g.add_edges(ig_edges)
print(len(g.es))

### Adding properties

In [None]:
with open('./data/musae_git_target.csv', 'r') as c:
    reader = csv.reader(c)
    node_attributes = [row for row in reader][1:]

In [None]:
for node_id, name in node_attributes:
    g.vs[int(node_id)]['developer_name'] = name
print(g.vs[0]['developer_name'])

In [None]:
developer_names = [row[1] for row in node_attributes]
g.vs['developer_name'] = developer_names
print(g.vs[0]['developer_name'])

In [None]:
developer_names_dup = [developer_names[0]] + developer_names
g.vs['developer_name'] = developer_names_dup


In [None]:
print(g.vs[0]['developer_name'])
print(g.vs[1]['developer_name'])

In [None]:
assert len(g.vs) == len(developer_names_dup)

### Using the `select()` method

In [None]:
high_degree = g.vs.select(_degree_gt=2000)

In [None]:
degree = g.degree()
g.vs['degree'] = degree

In [None]:
high_degree = g.vs.select(degree_gt=2000)
print(list(high_degree))

### Chained statements and `select()`

In [None]:
sample = g.vs.select([0, 1, 2])
print(list(sample))

In [None]:
degree_100 = g.vs.select(_degree_eq=100)
print(list(degree_100))

In [None]:
assert degree_100.select([1075])

In [None]:
sample = degree_100.select([0])
print(list(sample))

### Efficiency and path lengths

In [None]:
harmonic = g.harmonic_centrality()
print(harmonic)

In [None]:
harmonic = g.harmonic_centrality(cutoff=3)
print(harmonic)

## Common Neo4j issues

These examples will be done in the **Neo4j Browser**. The notebook will only store the Cypher Query Language (cql) files into a cypher folder, and to continue to follow on, head over to the window. 

### Slow writing in Neo4j

In [None]:
%%writefile cypher/load_git_edges.cql
LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
MERGE (d1:Developer {githubId:row.id_1})
MERGE (d2:Developer {githubId:row.id_2})
CREATE (d1)-[:FOLLOWS]->(d2)

In [None]:
%%writefile cypher/optim_auto_load_git_edges.cql
:auto LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
CALL {
	WITH row
	MERGE (d1:Developer {githubId:row.id_1})
	MERGE (d2:Developer {githubId:row.id_2})
	CREATE (d1)-[:FOLLOWS]->(d2)
} IN TRANSACTIONS OF 1000 ROWS


### Indexing for query performance

In [None]:
%%writefile cypher/remove_data.cql
MATCH (n) DETACH DELETE n

In [None]:
%%writefile cypher/create_githubid_index.cql
CREATE INDEX githubId_index
FOR (d:Developer)
ON (d.githubId)

In [None]:
%%writefile cypher/drop_gitid_index.cql
DROP INDEX githubId

### Caching results

In [None]:
%%writefile cypher/cache_example.cql
MATCH (d:Developer)-[:FOLLOWS]-(d2:Developer)
WITH size(collect(d2)) as degree, d
WHERE degree >= 10
RETURN d.githubId, degree ORDER BY degree DESC

### Memory limitations

In [None]:
%%writefile cypher/clear_cache.cql
MATCH (d) DETACH DELETE d

In [None]:
%%writefile cypher/apoc_period_it.cql
CALL apoc.periodic.iterate("
    MATCH (d:Developer) RETURN d",
    "DETACH DELETE d", 
    {batchSize:1000, parallel:false}
)

### Handling duplicates with `MERGE`

In [None]:
%%writefile cypher/merge_git_id.cql
MERGE (d:Developer {githubId: '1'})

In [None]:
%%writefile cypher/create_git_id.cql
CREATE (d:Developer {githubId: '1'})

In [None]:
%%writefile cypher/match_git_id.cql
MATCH (d:Developer {githubId: '1'}) RETURN d

In [None]:
%%writefile cypher/follower_gitid.cql
MERGE (d:Developer {githubId: '1'})-[:FOLLOWS]->(d2:Developer {githubId: '2'})

In [None]:
%%writefile cypher/resolution_gitid.cql
MERGE (d:Developer {githubId: '1'})
MERGE (d2:Developer {githubId: '2'})
MERGE (d1)-[:FOLLOWS]->(d2)

### Handling duplicates with CONSTRAINTs

In [None]:
%%writefile cypher/create_constraint.cql
CREATE CONSTRAINT githubId_constraint
FOR (d:Developer)
REQUIRE d.githubId IS UNIQUE

In [None]:
%%writefile cypher/drop_index_constraint.cql
DROP INDEX githubId_index

In [None]:
%%writefile cypher/merge_error.cql
MERGE (d:Developer {githubId: '1'})-[:FOLLOWS]->(d2:Developer {githubId: '2'})

### `EXPLAIN`, `PROFILE` and the `Eager` operator

In [None]:
%%writefile cypher/using_EXPLAIN.cql
EXPLAIN
MATCH (d:Developer)-[:FOLLOWS]->(d2:Developer)
RETURN d, d2

In [None]:
%%writefile cypher/load_and_EXPLAIN.cql
EXPLAIN 
LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
MERGE (d1:Developer {githubId:row.id_1})
MERGE (d2:Developer {githubId:row.id_2})
CREATE (d1)-[:FOLLOWS]->(d2)
RETURN d1, d2

In [None]:
%%writefile cypher/explain_on_row_1.cql
EXPLAIN 
LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
MERGE (d1:Developer {githubId:row.id_1})

In [None]:
%%writefile cypher/explain_on_row_2.cql
EXPLAIN 
LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
MERGE (d1:Developer {githubId:row.id_2})

In [None]:
%%writefile cypher/EXPLAIN_MATCH.cql
EXPLAIN 
LOAD CSV WITH HEADERS FROM 'file:///musae_git_edges.csv' AS row
MATCH (d1:Developer {githubId:row.id_1})
MATCH (d2:Developer {githubId:row.id_2})
CREATE (d1)-[:FOLLOWS]->(d2)