In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import networkx as nx
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score


date = "2022-06-01"
order = pd.read_csv("..//..//Database//NYC_trip//order_clean_260.csv", index_col=0)
network_metrics = pd.read_csv("..//..//Database//network_test_0305//network_metrics_%s.csv" % date, index_col=0)
SP = pd.read_csv("..//..//Database//network_test_0305//SP_60_%s.csv" % date, index_col=0)

order["call_time"] = pd.to_datetime(order["call_time"])
order["end_time"] = pd.to_datetime(order["end_time"])
start_time = pd.to_datetime(date + " 06:00:00 AM")
end_time = pd.to_datetime(date + " 12:00:00 PM")
order_one_day = order[(order["call_time"] >= start_time) & (order["call_time"] < end_time)]

SP.columns = ["SP"]
SP = SP.reset_index(drop=True)
order_one_day = order_one_day.sort_values(by="call_time").reset_index(drop=True)
network_metrics = network_metrics.reset_index(drop=True)

# the length of the three dataframes should be the same
assert len(order_one_day) == len(network_metrics) == len(SP)

# the index of the three dataframes should be the same
assert order_one_day.index.equals(network_metrics.index) 
assert SP.index.equals(network_metrics.index)

# for each SP time span x (e.g. x=60, 90, 120), the SP value in (endtime-x, endtime) should be dropped
# for example, if the endtime is 12:00, x = 60, then the SP value in (11:00, 12:00) should be dropped
# we aim to predict the SP value with provided information and the network
df = pd.concat([order_one_day, network_metrics, SP], axis=1).dropna()

In [14]:
# how to use network_build.py to build graphs
import network_build

area = np.load("..//..//Database//NYC_area//NY_area.npy")

# the start time and end time are [t, t+t_opt)]
order_start_time = pd.to_datetime(date + " 06:00:00 AM")
order_end_time = pd.to_datetime(date + " 7:00:00 AM")

# this is very important to make sure >= in call time and < in end time
order_pick = order_one_day[(order_one_day["call_time"] >= order_start_time) & (order_one_day["call_time"] < order_end_time)]
order_pick = order_pick[["sid", "call_time", "eid", "end_time"]]

net = network_build.ConstructNetwork(order_pick.values, area, void = 10)
G_order = net.build_network(network_type="order")
G_order.add_node('sink')
for node in G_order.nodes():
    if node != 'sink':
        G_order.add_edge(node, 'sink', weight=0)

In [15]:
G_order.edges()

OutEdgeView([('t0', 't279'), ('t0', 't294'), ('t0', 't338'), ('t0', 't369'), ('t0', 't383'), ('t0', 't397'), ('t0', 't406'), ('t0', 't417'), ('t0', 't427'), ('t0', 'sink'), ('t1', 't516'), ('t1', 't519'), ('t1', 't284'), ('t1', 't410'), ('t1', 't415'), ('t1', 't421'), ('t1', 't424'), ('t1', 't428'), ('t1', 't429'), ('t1', 't442'), ('t1', 't443'), ('t1', 't444'), ('t1', 't449'), ('t1', 't452'), ('t1', 't453'), ('t1', 't485'), ('t1', 't499'), ('t1', 't503'), ('t1', 't510'), ('t1', 't511'), ('t1', 'sink'), ('t2', 't815'), ('t2', 't829'), ('t2', 't909'), ('t2', 't910'), ('t2', 'sink'), ('t3', 't138'), ('t3', 't188'), ('t3', 't233'), ('t3', 't243'), ('t3', 't245'), ('t3', 't252'), ('t3', 't258'), ('t3', 't267'), ('t3', 't308'), ('t3', 't320'), ('t3', 't342'), ('t3', 't348'), ('t3', 'sink'), ('t4', 't671'), ('t4', 't752'), ('t4', 't772'), ('t4', 't785'), ('t4', 't786'), ('t4', 't790'), ('t4', 't848'), ('t4', 't889'), ('t4', 't904'), ('t4', 't905'), ('t4', 't906'), ('t4', 't917'), ('t4', 't92

In [16]:
G_order.nodes()

NodeView(('t0', 't1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10', 't11', 't12', 't13', 't14', 't15', 't16', 't17', 't18', 't19', 't20', 't21', 't22', 't23', 't24', 't25', 't26', 't27', 't28', 't29', 't30', 't31', 't32', 't33', 't34', 't35', 't36', 't37', 't38', 't39', 't40', 't41', 't42', 't43', 't44', 't45', 't46', 't47', 't48', 't49', 't50', 't51', 't52', 't53', 't54', 't55', 't56', 't57', 't58', 't59', 't60', 't61', 't62', 't63', 't64', 't65', 't66', 't67', 't68', 't69', 't70', 't71', 't72', 't73', 't74', 't75', 't76', 't77', 't78', 't79', 't80', 't81', 't82', 't83', 't84', 't85', 't86', 't87', 't88', 't89', 't90', 't91', 't92', 't93', 't94', 't95', 't96', 't97', 't98', 't99', 't100', 't101', 't102', 't103', 't104', 't105', 't106', 't107', 't108', 't109', 't110', 't111', 't112', 't113', 't114', 't115', 't116', 't117', 't118', 't119', 't120', 't121', 't122', 't123', 't124', 't125', 't126', 't127', 't128', 't129', 't130', 't131', 't132', 't133', 't134', 't135', 't136', 't13