In [30]:
import pandas as pd
import numpy as np
import pickle

In [31]:
journeys_path = r'..\data\cycle_journeys\189JourneyDataExtract20Nov2019-26Nov2019.csv'
bp_to_name = pickle.load(open(r'..\data\cycle_journeys\bikepointid_to_commonname.p', 'rb'))
bp_to_latlon = pickle.load(open(r'..\data\cycle_journeys\bikepointid_to_latlongs.p', 'rb'))

In [70]:
# We will sample a handful of BIKE IDs, for a single week
# samp_ids = [i for i in range(18000) if i % 101 == 0]
samp_ids = [i for i in range(18000)]

In [71]:
# read a dataframe of rows matching the above sample

iter_csv = pd.read_csv(journeys_path
                       ,header=0
                        ,sep=','
                        ,parse_dates=['Start Date', 'End Date']
                        ,infer_datetime_format=True
                       ,iterator=True
                       ,chunksize=1000)


df = pd.concat([chunk[
    (chunk['Bike Id'].isin(samp_ids))
     ] for chunk in iter_csv])

In [72]:
df.sort_values(by=['Bike Id', 'Start Date'], inplace=True)

## Develop Function

In [74]:
redirections = []  # any redirection journey observed by any bike
seqs = dict()
for bike in samp_ids:
    samp = df[df['Bike Id'] == bike]
    i = 0
    for _, row in samp.iterrows():
        if i == 0:
            journey_seq = [(row['StartStation Id'], row['EndStation Id'])]
            skipped_seq = [0]
        else:
            start = row['StartStation Id']
            end = row['EndStation Id']
            if start != journey_seq[-1][1]:
                skipped_seq.append(1)
                redirections.append((journey_seq[-1][1], start))
            else:
                skipped_seq.append(0)
            journey_seq.append((start, end))
        i +=1 
    seqs[bike] = (journey_seq, skipped_seq)

In [75]:
seqs[202]

([(653, 723),
  (723, 729),
  (729, 368),
  (368, 731),
  (731, 723),
  (723, 691),
  (691, 618),
  (618, 636),
  (636, 747),
  (747, 160)],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [76]:
seqs[6363]

([(52, 124),
  (124, 727),
  (727, 656),
  (656, 671),
  (671, 335),
  (100, 9),
  (9, 298),
  (298, 732),
  (732, 338),
  (338, 762),
  (361, 19),
  (19, 101),
  (101, 831),
  (831, 221),
  (374, 587),
  (587, 251),
  (251, 202),
  (202, 564),
  (564, 186),
  (186, 101),
  (101, 269),
  (269, 594),
  (594, 308)],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [77]:
import plotly.graph_objects as go

fig = go.Figure()

journeys = []
for j in seqs[202][0]:
    fig.add_trace(
        go.Scattergeo(
            #locationmode = 'USA-states',
            lon = [bp_to_latlon[j[0]][1], bp_to_latlon[j[1]][1]],
            lat = [bp_to_latlon[j[0]][0], bp_to_latlon[j[1]][0]],
            mode = 'lines',
            line = dict(width = 2,color = 'red'),
            hoverinfo = 'skip'
        )
    )

# automatically zoom
fig.update_geos(fitbounds="locations")

fig.update_layout(showlegend=False)

fig.show()

In [87]:
# PLOT REDIRECTIONS 

fig = go.Figure()

journeys = []
for j in redirections:
    try:
        fig.add_trace(
                go.Scattergeo(
                    #locationmode = 'USA-states',
                    lon = [bp_to_latlon[j[0]][1], bp_to_latlon[j[1]][1]],
                    lat = [bp_to_latlon[j[0]][0], bp_to_latlon[j[1]][0]],
                    mode = 'lines',
                    line = dict(width = 2,color = 'blue'),
                    hoverinfo = 'skip'
                    , opacity = 0.01
                )

        )
    except KeyError:
        pass

# automatically zoom
fig.update_geos(fitbounds="locations")

fig.update_layout(showlegend=False)

fig.show()