# Tutorial about Neo4j

References:
* https://github.com/neo4j-contrib/neo4j-graph-algorithms
* https://github.com/neo4j/graph-data-science/

## Setup

In [1]:
%pip install -q neo4j==5.22.0

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/293.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m286.7/293.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.5/293.5 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import yaml
from google.colab import drive
from getpass import getpass

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Read YAML file
f_path = "/content/drive/MyDrive/GitHub/python-codebase/machine_learning/private_keys.yml"
with open(f_path, 'r') as stream:
    data_loaded = yaml.safe_load(stream)
#os.environ['HF_API_TOKEN'] = data_loaded['HF_API_KEY']
#os.environ['GITHUB_TOKEN'] = data_loaded['GITHUB_TOKEN']
neo4j_username = data_loaded['NEO4J_USERNAME']
neo4j_pass = data_loaded['NEO4J_PASSWORD']

## Load data

In [4]:
import pandas as pd

In [15]:
# Load nodes
df_nodes = pd.read_csv("/content/drive/MyDrive/GitHub/python-codebase/machine_learning/datasets/graph_datasets/airports.csv", header=None).drop(columns=[0])
list_cols = ['name', 'c2_', 'c3_', 'id', 'c5_', 'latitude', 'longitude', 'c8_', 'c9_', 'c10_', 'c11_', 'c12_', 'c13_']
df_nodes.columns = list_cols
df_nodes = df_nodes[df_nodes["id"] != "\\N"]
df_nodes = df_nodes[df_nodes['c3_']=='United States'] # Subselect
print(df_nodes.shape)
df_nodes.head()

(1333, 13)


Unnamed: 0,name,c2_,c3_,id,c5_,latitude,longitude,c8_,c9_,c10_,c11_,c12_,c13_
3200,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134003,-143.582001,2,-9,A,America/Anchorage,airport,OurAirports
3201,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.875099,-166.110001,16,-9,A,America/Anchorage,airport,OurAirports
3202,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005,22,-9,A,America/Anchorage,airport,OurAirports
3203,Hilo International Airport,Hilo,United States,ITO,PHTO,19.721399,-155.048004,38,-10,N,Pacific/Honolulu,airport,OurAirports
3204,Orlando Executive Airport,Orlando,United States,ORL,KORL,28.5455,-81.332901,113,-5,A,America/New_York,airport,OurAirports


In [9]:
# Load relationships
df_relationships = pd.read_csv('/content/drive/MyDrive/GitHub/python-codebase/machine_learning/datasets/graph_datasets/188591317_T_ONTIME.csv.gz')
df_relationships.head()

  df_relationships = pd.read_csv('/content/drive/MyDrive/GitHub/python-codebase/machine_learning/datasets/graph_datasets/188591317_T_ONTIME.csv.gz')


Unnamed: 0,YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,UNIQUE_CARRIER,AIRLINE_ID,CARRIER,TAIL_NUM,...,DIV4_TAIL_NUM,DIV5_AIRPORT,DIV5_AIRPORT_ID,DIV5_AIRPORT_SEQ_ID,DIV5_WHEELS_ON,DIV5_TOTAL_GTIME,DIV5_LONGEST_GTIME,DIV5_WHEELS_OFF,DIV5_TAIL_NUM,Unnamed: 109
0,2018,2,5,1,2,2018-05-01,DL,19790,DL,N6709,...,,,,,,,,,,
1,2018,2,5,1,2,2018-05-01,DL,19790,DL,N820DN,...,,,,,,,,,,
2,2018,2,5,1,2,2018-05-01,DL,19790,DL,N856DN,...,,,,,,,,,,
3,2018,2,5,1,2,2018-05-01,DL,19790,DL,N329NB,...,,,,,,,,,,
4,2018,2,5,1,2,2018-05-01,DL,19790,DL,N697DL,...,,,,,,,,,,


In [11]:
# Processing
list_cols = [
  "ORIGIN",
  "DEST",
  "FL_DATE",
  "DEP_DELAY",
  "ARR_DELAY",
  "DISTANCE",
  "TAIL_NUM",
  "FL_NUM",
  "CRS_DEP_TIME",
  "CRS_ARR_TIME",
  "UNIQUE_CARRIER"
]
df_relationships = df_relationships[list_cols]
df_relationships = df_relationships.rename(columns={
  "ORIGIN":"src",
  "DEST":"dst",
  "DEP_DELAY":"deptDelay",
  "ARR_DELAY":"arrDelay",
  "TAIL_NUM":"tailNumber",
  "FL_NUM":"flightNumber",
  "FL_DATE":"date",
  "CRS_DEP_TIME":"time",
  "CRS_ARR_TIME":"arrivalTime",
  "DISTANCE":"distance",
  "UNIQUE_CARRIER":"airline"
})
df_relationships.head()

Unnamed: 0,src,dst,date,deptDelay,arrDelay,distance,tailNumber,flightNumber,time,arrivalTime,airline
0,ATL,SLC,2018-05-01,-1.0,-3.0,1590.0,N6709,15,1920,2120,DL
1,LAS,DTW,2018-05-01,7.0,-2.0,1749.0,N820DN,16,1400,2102,DL
2,DTW,LAS,2018-05-01,-4.0,23.0,1749.0,N856DN,17,1757,1915,DL
3,MSP,DTW,2018-05-01,-6.0,-13.0,528.0,N329NB,22,1335,1615,DL
4,DTW,MSP,2018-05-01,6.0,6.0,528.0,N697DL,23,1556,1650,DL


## Upload data

In [16]:
from neo4j import GraphDatabase
