In [None]:
#Prepare Dataset for Jan 2023

import pandas as pd
#import datetime


df_month = pd.read_csv('../../data/bicing/raw/2023_01_STATIONS.csv')

#Drop columns
df_month.drop('ttl', axis=1, inplace=True)
df_month.drop('is_charging_station', axis=1, inplace=True)
df_month.drop('traffic', axis=1, inplace=True)

#change datetime type
df_month['last_reported'] = pd.to_datetime(df_month['last_reported'], unit='s')
df_month['last_updated'] = pd.to_datetime(df_month['last_updated'], unit='s')   

df_month['year'] = df_month['last_reported'].dt.year
df_month['month'] = df_month['last_reported'].dt.month
df_month['day'] = df_month['last_reported'].dt.day
df_month['hour'] = df_month['last_reported'].dt.hour
df_month['minute'] = df_month['last_reported'].dt.minute
#group 0-15, 16-30, 31-45, 46-59
df_month['grouped_minute'] = df_month['minute'].apply(lambda x: 0 if x < 15 else 15 if x < 30 else 30 if x < 45 else 45)
#df_month['grouped_minute'] = df_month['minute'].apply(lambda x: 0 if x < 30 else 30)
df_month['day_of_week'] = df_month['last_reported'].dt.dayofweek
df_month['is_weekend'] = df_month['day_of_week'].apply(lambda x: 1 if x > 4 else 0)
df_month['grouped_date'] = df_month['year'].astype(str) + '-' + df_month['month'].astype(str) + '-' + df_month['day'].astype(str) + ' ' + df_month['hour'].astype(str) + ':' + df_month['grouped_minute'].astype(str).apply(lambda x: '0' + str(x) if int(x) < 10 else str(x))

# drop unique columns for date
df_month.drop('last_reported', axis=1, inplace=True)
df_month.drop('last_updated', axis=1, inplace=True)
df_month.drop('minute', axis=1, inplace=True)

df_month.drop_duplicates(inplace=True)
df_month.count()

In [None]:
#busisest day in january i need the bike diff but using station_id and date grouped as order
df_month['bike_available_diff'] = df_month.groupby(['station_id'])['num_bikes_available'].diff()

df_month.head(2)

In [None]:
# sum all the diff as positive to count movements by grouped_date to see the busiest day
df_month['bike_available_diff'] = df_month['bike_available_diff'].apply(lambda x: abs(x))
df_month_busiest_days = df_month.groupby('day').agg({'bike_available_diff': 'sum'}).reset_index()
df_month_busiest_days.sort_values(by='bike_available_diff', ascending=False).head(10)

In [None]:
busiest_day = df_month[df_month['day'] == 13]
busiest_day.head(2)

In [None]:
df_stations = pd.read_csv('../../data/bicing/processed/2024_STATION_LOCATIONS.csv')
df_month_busiest_days = busiest_day.merge(df_stations, on='station_id', how='left')
df_month_busiest_days.head(3)

In [None]:
#df_month_busiest_days[df_month_busiest_days['is_returning'] == 0]

In [None]:
df_month_busiest_days['docking_available'] = df_month_busiest_days['num_docks_available'].apply(lambda x: False if x == 0 else True)
#df_month_busiest_days['docking_available'] = df_month_busiest_days['is_returning'].apply(lambda x: x if x == 1 else False)

In [None]:
import plotly.express as px

# Assuming df_month_busiest_days is your DataFrame and it includes the 'is_returning' column with values 0 and 1
fig = px.scatter_mapbox(df_month_busiest_days, lat="lat", lon="lon", color="docking_available",
                        color_discrete_map={0: "red", 1: "green"},
                        hover_name="address", hover_data=["station_id", "altitude", "post_code", "capacity"],
                        title="January's busiest day Docking Availability",
                        animation_frame='grouped_date',
                        zoom=11.5, height=630, width=1200,
                        category_orders={"docking_available": [0, 1]})  # Explicitly setting the order of categories

fig.update_layout(mapbox_style="open-street-map",
                  margin={"r": 0, "t": 80, "l": 0, "b": 0},
                  mapbox={"center": {"lat": 41.40484, "lon": 2.17482}})

for i, frame in enumerate(fig.frames):
    time_frame = frame.name[-5:]
    frame.layout.title = "January's busiest day Docking Availability At: {}".format(str(time_frame))

fig.show()

In [None]:
df_month_busiest_days['bikes_available'] = df_month_busiest_days['num_bikes_available'].apply(lambda x: False if x == 0 else True)
#df_month_busiest_days['bikes_available'] = df_month_busiest_days['is_renting'].apply(lambda x: x if x == 1 else False)


In [None]:
import plotly.express as px

# Assuming df_month_busiest_days is your DataFrame and it includes the 'is_returning' column with values 0 and 1
fig = px.scatter_mapbox(df_month_busiest_days, lat="lat", lon="lon", color="bikes_available", # size="num_bikes_available",size_max=2,
                        color_discrete_map={0: "red", 1: "green"},
                        hover_name="address", hover_data=["station_id", "altitude", "post_code", "capacity","num_bikes_available"],
                        title="January's busiest day Bikes Availability",
                        animation_frame='grouped_date',
                        zoom=11.5, height=630, width=1200,
                        category_orders={"bikes_available": [0, 1]})  # Explicitly setting the order of categories

fig.update_layout(mapbox_style="open-street-map",
                  margin={"r": 0, "t": 80, "l": 0, "b": 0},
                  mapbox={"center": {"lat": 41.40484, "lon": 2.17482}})

for i, frame in enumerate(fig.frames):
    time_frame = frame.name[-5:]
    frame.layout.title = "January's busiest day Bikes Availability At: {}".format(str(time_frame))
    
fig.show()