# Optimizing travel with Python and Cypher

In [None]:
#!pip install -r requirements.txt

In [None]:
from graphtastic.database.neo4j import Neo4jConnect 

## Connecting to Neo4j

Please make sure to start the database (**Python DB**) we created in the *5.01_Setup_and_query_Neo4J.ipynb* notebook. Once the service has started, we can connect to our *localhost*.

In [None]:
connection = Neo4jConnect('bolt://localhost:7687', 'admin', 'testpython')
print(connection)

## Optimizing travel with Python and Cypher

In [None]:
cypher = 'MATCH (n) ' \
         'RETURN n.name, n.population ' \
         'ORDER BY n.population ' \
         'DESC LIMIT 1'

connection = Neo4jConnect('bolt://localhost:7687', 'admin', 'testpython')
result = connection.query(cypher).data()
print(result)

In [None]:
cypher = 'MATCH (n:City {name: "New York, NY"}) ' \
         'MATCH (n)-[r:AIR_TRAVEL]->(m) ' \
         'WHERE r.travel_time < 100 ' \
         'RETURN m.name'

In [None]:
result = connection.query(cypher).data()
print(result)
print(len(result))


In [None]:
cypher = 'MATCH (n:City {name: "San Diego, CA"}) '\
         'MATCH (m:City {name: "St. Johns, NL"}) '\
         'MATCH p=(n)-[*..3]->(m) ' \
         'WITH p, [r in relationships(p) | properties(r)] as rels ' \
         'RETURN p, rels'

In [None]:
result = connection.query(cypher).data()
print(result)
print(len(result))

## Travel recommendations

In [None]:
paths = [path['p'] for path in result]

In [None]:
nodes = [node for path in paths for node in path if node != 'AIR_TRAVEL']

In [None]:
node_attributes = {node['node_id']:node for node in nodes}.values()
print(node_attributes)

In [None]:
node_ids = [node['node_id'] for node in node_attributes]
names = [node['name'] for node in node_attributes]
populations = [node['population'] for node in node_attributes]
latitudes = [node['latitude'] for node in node_attributes]
longitudes = [node['longitude'] for node in node_attributes]
igraph_ids = {node['node_id']: i for i, node in enumerate(node_attributes)}

In [None]:
travel_time_paths = [path['rels'] for path in result]
print(travel_time_paths)

In [None]:
edge_list = []
edge_attributes = []
for path, times in zip(paths, travel_time_paths):
    clean_path = [node['node_id'] for node in path if node != 'AIR_TRAVEL']
    travel_times = [rel['travel_time'] for rel in times]
    for n, time in zip(zip(clean_path, clean_path[1:]), travel_times):
        edge_list.append([igraph_ids[n[0]], igraph_ids[n[1]]])
        edge_attributes.append(time)


Build the graph

In [None]:
import igraph

g = igraph.Graph(directed=True)
g.add_vertices(len(node_ids))
g.vs['node_id'] = node_ids
g.vs['name'] = names
g.vs['population'] = populations
g.vs['latitude'] = latitudes
g.vs['longitude'] = longitudes
g.add_edges(edge_list)
g.es['travel_time'] = edge_attributes

In [None]:
print([[g.vs[edge.source]['node_id'], g.vs[edge.target]['node_id'], edge['travel_time']] for edge in g.es])

## Djikstra algorithm

In [None]:
source = g.vs.select(name_eq='San Diego, CA')
target = g.vs.select(name_eq='St. Johns, NL')

![](fig/Figure%205.3.png)

In [None]:
shortest_path = g.get_shortest_paths(source[0], target[0], weights='travel_time')
print(shortest_path)
shortest_path = [g.vs[node]['name'] for node in shortest_path]
print(shortest_path)

In [None]:
short_path_rels = g.get_shortest_paths(source[0], target[0], weights='travel_time', output='epath')
print(short_path_rels)
short_path_distances = [g.es[edge]['travel_time'] for edge in short_path_rels]
print(short_path_distances)
shortest_travel_time = sum(short_path_distances[0])

In [None]:
print(shortest_travel_time)

In [None]:
edges = [[g.vs[edge.source]['node_id'], g.vs[edge.target]['node_id']] for edge in g.es]
latitudes = {node['node_id']: node['latitude'] for node in g.vs}
longitudes = {node['node_id']: node['longitude'] for node in g.vs}

In [None]:
from geopy.distance import geodesic

In [None]:
def find_distances(edges, latitudes, longitudes):
    distances = []
    for n, m in edges:
        loc_1 = (latitudes[n], longitudes[n])
        loc_2 = (latitudes[m], longitudes[m])
        distance = geodesic(loc_1, loc_2).km
        distances.append(int(distance))
    return distances

In [None]:
distances = find_distances(edges, latitudes, longitudes)
g.es['distance'] = distances

## Shortest paths with distances

In [None]:
source = g.vs.select(name_eq='San Diego, CA')
target = g.vs.select(name_eq='St. Johns, NL')

In [None]:
shortest_path = g.get_shortest_paths(source[0], target[0], weights='distance')
print(shortest_path)
shortest_path = [g.vs[node]['name'] for node in shortest_path]
print(shortest_path)

In [None]:
short_path_rels = g.get_shortest_paths(source[0], target[0], weights='distance', output='epath')
print(short_path_rels)
short_path_distances = [g.es[edge]['distance'] for edge in short_path_rels]
print(short_path_distances)
shortest_travel_time = sum(short_path_distances[0])
print(shortest_travel_time)

## Calculate physical distance and write to Neo4J

In [None]:
edges_cypher = 'MATCH (n)-[:AIR_TRAVEL]->(m) ' \
               'RETURN n.node_id, m.node_id'

nodes_cypher = 'MATCH (n) ' \
               'RETURN n.node_id, n.latitude, n.longitude'

In [None]:
connection = Neo4jConnect('bolt://localhost:7687', 'admin', 'testpython')
edges = connection.query(edges_cypher).data()
lat_longs = connection.query(nodes_cypher).data()
connection.close()

In [None]:
edges = [[edge['n.node_id'], edge['m.node_id']] for edge in edges]
latitudes = {node['n.node_id']: node['n.latitude'] for node in lat_longs}
longitudes = {node['n.node_id']: node['n.longitude'] for node in lat_longs}

In [None]:
distances = find_distances(edges, latitudes, longitudes)
distances = list(zip(edges, distances))
distances = [[edge[0], edge[1], distance] for edge, distance in distances]
print(distances)

In [None]:
import csv
with open('./data/distances.csv', 'w', newline='') as c:
    writer = csv.writer(c)
    for edge in distances:
        writer.writerow(edge)