# Script test and exploration of data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd

import datetime

# import numpy as np
# import pyarrow

### Specific stations

In [4]:
# NB: strings since some station names are not int(!)

ref_ids = [
    #"Molière - République",
    '21209',
    #"Jean Marin Naudin - Stalingrad",
    '22202',
    #"Arthur Auger - Jean Jaurès",
    '21205',
    #"Marne - Germain Dardan"
    '21212',
]

# Typical usage
# df[df["stationCode"].isin(ref_ids)]

### Load basic data

In [8]:
date_str = "2022-05-18"

file_name = "Summary_{}.parquet".format(date_str)
file_path = os.path.join("data", file_name)

aux_df = pd.read_parquet(file_path)
aux_df

Unnamed: 0_level_0,time,stationCode,operative,available_mechanical,available_electrical
file_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-05-18 00:00:07+02:00,2022-05-17 21:34:00+00:00,35011,True,5.0,12.0
2022-05-18 00:00:07+02:00,2022-05-17 21:38:00+00:00,11040,True,17.0,0.0
2022-05-18 00:00:07+02:00,2022-05-17 21:33:00+00:00,41204,True,1.0,17.0
2022-05-18 00:00:07+02:00,2022-05-17 21:34:00+00:00,19009,True,1.0,2.0
2022-05-18 00:00:07+02:00,2021-02-11 09:14:00+00:00,16004,False,0.0,0.0
...,...,...,...,...,...
2022-05-18 23:50:07+02:00,2022-05-18 21:41:00+00:00,19009,True,1.0,2.0
2022-05-18 23:50:07+02:00,2022-05-18 21:37:00+00:00,41204,True,1.0,12.0
2022-05-18 23:50:07+02:00,2022-05-18 21:39:00+00:00,11040,True,15.0,4.0
2022-05-18 23:50:07+02:00,2022-05-18 21:42:00+00:00,9117,True,2.0,1.0


### Restrict to specific station

In [None]:
station_code = '21209' #"Molière - République"

current_df = aux_df[aux_df["stationCode"]==station_code]

## Defining graph functions

In [81]:
import plotly.express as px


fig = px.line(current_df.reset_index(), x="file_time", y="available_mechanical", markers="dot")

fig.show()

In [2]:
import plotly.graph_objects as go

def plot_bikes(date_str, station_code):
    """ Plot graph number of both mech and elec bikes between 07:30 and 10:00 """
    
    file_name = "Summary_{}.parquet".format(date_str)
    file_path = os.path.join("data", file_name)

    aux_df = pd.read_parquet(file_path)

    current_df = aux_df[aux_df["stationCode"]==station_code]

    start_str = "{} 07:25".format(date_str)
    end_str = "{} 10:05".format(date_str)
    my_df = current_df.loc[start_str: end_str]

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=my_df.index, y=my_df["available_mechanical"], 
                    mode='lines+markers', name="mech"))
    fig.add_trace(go.Scatter(x=my_df.index, y=my_df["available_electrical"], 
                    mode='lines+markers', name="elec"))

    fig.show()

## Exploration by station

### Molière - République (Montrouge): 21209

In [6]:
# Lundi
plot_bikes("2022-05-02", "21209")

In [7]:
# Mardi
plot_bikes("2022-05-03", "21209")

In [8]:
# Samedi
plot_bikes("2022-05-07", "21209")

In [9]:
# Dimanche
plot_bikes("2022-05-08", "21209")

In [5]:
# Lundi
plot_bikes("2022-05-09", "21209")

In [71]:
# Mercredi
plot_bikes("2022-05-11", "21209")

In [19]:
# Jeudi
plot_bikes("2022-05-12", "21209")

In [73]:
# Vendredi
plot_bikes("2022-05-13", "21209")

In [66]:
# Samedi
plot_bikes("2022-05-14", "21209")

In [67]:
# Dimanche (pas de données)
plot_bikes("2022-05-15", "21209")

In [68]:
# Lundi
plot_bikes("2022-05-16", "21209")

In [70]:
# mardi
plot_bikes("2022-05-17", "21209")

In [18]:
# Jeudi
plot_bikes("2022-05-19", "21209")

### Center of Paris: Filles Saint-Thomas - Place de la Bourse (2009)

In [80]:
# Jeudi
plot_bikes("2022-05-13", "2009")

In [79]:
# Vendredi
plot_bikes("2022-05-14", "2009")

In [78]:
# Samedi (données manquantes)
plot_bikes("2022-05-15", "2009")

In [76]:
# Dimanche
plot_bikes("2022-05-16", "2009")

In [75]:
# Lundi
plot_bikes("2022-05-17", "2009")

### Edge of Paris: Porte de Vanves - 14138

In [82]:
# Jeudi
plot_bikes("2022-05-13", "14138")

In [83]:
# Vendredi
plot_bikes("2022-05-14", "14138")

In [84]:
# Samedi (données manquantes)
plot_bikes("2022-05-15", "14138")

In [85]:
# Dimanche
plot_bikes("2022-05-16", "14138")

In [7]:
# Dimanche
plot_bikes("2022-05-09", "14138")

In [86]:
# Lundi
plot_bikes("2022-05-17", "14138")

In [6]:
# Jeudi
plot_bikes("2022-05-19", "14138")

## Nombre de stations occupées

In [None]:
import plotly.graph_objects as go

def plot_total_bikes(date_str):
    """ Plot graph of total number of both mech and elec bikes between 07:30 and 10:00 """
    
    file_name = "Summary_{}.parquet".format(date_str)
    file_path = os.path.join("data", file_name)

    aux_df = pd.read_parquet(file_path)

    start_str = "{} 07:25".format(date_str)
    end_str = "{} 10:05".format(date_str)
    my_df = aux_df.loc[start_str: end_str]

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=my_df.index, y=my_df["available_mechanical"], 
                    mode='lines+markers', name="mech"))
    fig.add_trace(go.Scatter(x=my_df.index, y=my_df["available_electrical"], 
                    mode='lines+markers', name="elec"))

    fig.show()

In [4]:
os.getcwd()

'd:\\git\\predi-veli'

In [3]:
import pandas as pd

date_str = "2022-05-20"
file_name = "Summary_{}.parquet".format(date_str)
file_path = os.path.join("data", file_name)

aux_df = pd.read_parquet(file_path)

start_str = "{} 07:25".format(date_str)
end_str = "{} 10:05".format(date_str)
my_df = aux_df.loc[start_str: end_str]

In [6]:
import plotly.graph_objects as go

fig = go.Figure()

sum_df = my_df.groupby('file_time').sum()

fig.add_trace(go.Scatter(x=sum_df.index, y=sum_df["available_mechanical"], 
                mode='lines+markers', name="mech"))
fig.add_trace(go.Scatter(x=sum_df.index, y=sum_df["available_electrical"], 
                mode='lines+markers', name="elec"))

fig.show()

In [4]:
aux_val = my_df.groupby('file_time').sum()

In [5]:
aux_val

Unnamed: 0_level_0,operative,available_mechanical,available_electrical
file_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-05-20 07:30:07+02:00,1407,9724.0,6513.0
2022-05-20 07:40:07+02:00,1407,9607.0,6305.0
2022-05-20 07:50:07+02:00,1407,9400.0,6034.0
2022-05-20 08:00:07+02:00,1407,9500.0,6060.0
2022-05-20 08:10:08+02:00,1407,9499.0,6153.0
2022-05-20 08:20:08+02:00,1407,9583.0,6264.0
2022-05-20 08:30:08+02:00,1407,9627.0,6294.0
2022-05-20 08:40:08+02:00,1407,9520.0,6200.0
2022-05-20 08:50:08+02:00,1407,9394.0,5975.0
2022-05-20 09:00:08+02:00,1407,9279.0,5880.0
