## Here I try to visually check the relevance of my resampling and interpolating method, using data from the month of March.

In [1]:
import pandas as pd
import datetime
from datetime import date
import os

I use functions defined in `preprocess.py`.

In [2]:
import load_data

from preprocess import totem_first_cleaning
from preprocess import from_select_date
from preprocess import drop_hour_gap
from preprocess import resamp_interp
from preprocess import only_at
from preprocess import pick_week_days

Downloading data from https://doc-0k-7k-sheets.googleusercontent.com/pub/l5l039s6ni5uumqbsj9o11lmdc/vn5b2ehb096h9ockc75cfk1pp4/1617203400000/108937725768799295374/*/e@2PACX-1vQVtdpXMHB4g9h75a0jw8CsrqSuQmP5eMIB2adpKR5hkRggwMwzFy5kB-AIThodhVHNLxlZYm8fuoWj?gid=2105854808&single=true&output=csv (1 byte)

file_sizes: 50.2kB [00:00, 1.25MB/s]                                            
Successfully downloaded file to ./Data/SaisiesTotem.csv


Loading dataset

In [3]:
df_totem = load_data.Load_totemdata().save_as_df()

Downloading data from https://doc-0k-7k-sheets.googleusercontent.com/pub/l5l039s6ni5uumqbsj9o11lmdc/7gij79plml8ghd7pu31iukvdvg/1617203410000/108937725768799295374/*/e@2PACX-1vQVtdpXMHB4g9h75a0jw8CsrqSuQmP5eMIB2adpKR5hkRggwMwzFy5kB-AIThodhVHNLxlZYm8fuoWj?gid=2105854808&single=true&output=csv (1 byte)

file_sizes: 50.2kB [00:00, 1.57MB/s]                                            
Successfully downloaded file to ./Data/SaisiesTotem.csv


A bit of cleaning before anything else.

In [4]:
df_totem = totem_first_cleaning(df_totem)

Select data from March, 6 2021 only for plotting purpose, and do the resampling and interpolating operation. (These functions are documented in `preprocess.py`)

In [5]:
raw_march = from_select_date(df_totem, 2021, 3, 6)
march = from_select_date(df_totem, 2021, 3, 6)

In [6]:
march = drop_hour_gap(march, 12)
march_minutes = resamp_interp(march)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dftemp["Today's total"][i] = 0.0


## Plotting part

In [7]:
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

Select only the week days (monday$= 0$, sunday$= 6$)

In [10]:
march_minutes_week = pick_week_days(march_minutes)
raw_march.set_index('Date', inplace=True)
raw_march_week = pick_week_days(raw_march)

In [11]:
raw_march_week.reset_index(inplace=True)
march_minutes_week.reset_index(inplace=True)

In [12]:
# choose template
pio.templates.default = "plotly_white"
layout = go.Layout(
                autosize=False,
                width=1000,
                height=500)

# initialize a figure
fig = go.Figure(layout=layout)

# (line) plot resampled and interpolated values
fig.add_trace(go.Scatter(
                    x=march_minutes_week['Date'],
                    y=march_minutes_week["Today's total"],
                    mode='lines',
                    name='resampled & <br>interpolated <br>values',
                    marker=dict(color="#ca0020")))

# (scatter) plot actual values
fig.add_trace(go.Scatter(
                    x=raw_march_week['Date'],
                    y=raw_march_week["Today's total"],
                    mode='markers',
                    name='actual values',
                    marker=dict(color="#252525")))

# details
fig.update_yaxes(title_text='Number of bike')
fig.update_layout(
            title_text='Actual VS resampled & interpolated values',
            title_font_size=21,
            title_x=0.46,
            title_y=0.88)

fig.show()

Save image

In [13]:
# as png not svg for LaTeX purpose
fig.write_image("resamp_interp.png")