# Análisis predictivo de la disponibilidad de bicicletas en estaciones BiciMad
Pablo Ariño & Álvaro Laguna

## Importación de librerías y cofiguración

In [1]:
import os

import dotenv

from src.data import (
    find_json_files_in_directory,
    load_json_files,
    load_json_objects,
    build_stations_dataframe,
    get_k_closest_stations,
    build_dock_bikes_timeseries_dataframe,
    get_raw_data,
    load_holidays,
    get_holidays,
    load_json_files_per_month,
)
from src.visualization import (
    plot_stations_in_map)

dotenv.load_dotenv()

True

In [2]:
DATA_PATH = os.getenv("DATA_PATH", "data")
DATA_RAW_PATH = DATA_PATH + "/raw/"
DATA_FILES = sorted(find_json_files_in_directory(DATA_RAW_PATH))
print(f"Data files: {DATA_FILES}")
TARGET_STATION_ID = int(os.getenv("TARGET_STATION_ID", "90"))
NUM_NEIGHBOR_STATIONS = int(os.getenv("NUM_NEIGHBOR_STATIONS", "5"))

Data files: ['202201.json', '202202.json', '202203.json', '202204.json', '202205.json', '202206.json', '202207.json', '202208.json', '202209.json', '202210.json', '202211.json', '202212.json']


## Load data

In [3]:
# raw_data = get_raw_data(DATA_RAW_PATH)
raw_data = load_json_files_per_month(DATA_RAW_PATH, '2022-01', '2022-12')

In [4]:
len(raw_data)

8738

## Process data

In [5]:
stations = build_stations_dataframe(raw_data)
stations

Unnamed: 0_level_0,name,number,address,latitude,longitude,total_bases
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Puerta del Sol A,1a,Puerta del Sol nº 1,40.4172137,-3.7018341,30
2,Puerta del Sol B,1b,Puerta del Sol nº 1,40.41731271011562,-3.701602938060457,30
3,Miguel Moya,2,Calle Miguel Moya nº 1,40.4205886,-3.7058415,24
4,Plaza Conde Suchil,3,Plaza del Conde del Valle de Súchil nº 3,40.4302937,-3.7069171,18
5,Malasaña,4,Calle Manuela Malasaña nº 5,40.4285524,-3.7025875,24
...,...,...,...,...,...,...
265,INEF,257,Avenida Juan de Herrera frente a la calle Paul...,40.43896,-3.72997,24
266,Ciudad Universitaria 1,258,Avenida de la Complutense (Metro Ciudad Univer...,40.44375,-3.72699,24
267,Ciudad Universitaria 2,259,Avenida de la Complutense (Metro Ciudad Univer...,40.44342,-3.72693,24
268,Facultad Biología,260,Calle José Antonio Novais frente al nº 12,40.4483322,-3.7272945,24


In [6]:
top_5_closest_stations = get_k_closest_stations(
    TARGET_STATION_ID, stations, k=NUM_NEIGHBOR_STATIONS,
)

In [7]:
top_5_closest_stations

Unnamed: 0_level_0,name,number,address,latitude,longitude,total_bases,distance
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
102,Alcalá,97,Avenida de Menéndez Pelayo nº 3,40.4222969,-3.6805189,27,0.098442
101,Castelló,96,Calle Alcalá nº 111,40.422064,-3.6821793,27,0.194586
107,Velázquez,102,Calle Alcalá nº 95,40.4211802,-3.6840229,24,0.342588
65,Narváez,61,Calle O'Donnell nº 28,40.4213983,-3.6752045,24,0.407816
103,Plaza de Felipe II,98,Plaza de Felipe II,40.4239447,-3.6758383,24,0.445928


In [8]:
top_5_closest_stations_ids = top_5_closest_stations.index.tolist()
plot_stations_in_map(
    stations,
    target_station_id=TARGET_STATION_ID,
    other_stations_ids=top_5_closest_stations_ids,
)

In [9]:
station_names = [TARGET_STATION_ID] + top_5_closest_stations_ids
dock_bikes_timeseries = build_dock_bikes_timeseries_dataframe(
    raw_data, station_names
)

In [12]:
dock_bikes_timeseries

Unnamed: 0_level_0,90,102,101,107,65,103
timestamps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-01 00:13:20.603583,19,21,21,14,13,15
2022-01-01 01:13:21.911079,19,20,21,14,11,15
2022-01-01 02:13:23.718951,19,20,20,14,11,13
2022-01-01 03:13:23.902654,18,20,20,14,11,12
2022-01-01 04:13:26.826536,18,20,19,14,11,11
...,...,...,...,...,...,...
2022-12-31 19:57:50.302819,8,12,12,5,11,2
2022-12-31 20:57:50.382304,4,11,11,6,9,2
2022-12-31 21:57:52.761035,3,13,13,6,11,1
2022-12-31 22:57:53.608384,3,13,13,6,9,1


In [13]:
raw_holydays = load_holidays(DATA_RAW_PATH+"/calendario.csv")

In [14]:
raw_holydays

Unnamed: 0_level_0,Dia_semana,laborable / festivo / domingo festivo,Tipo de Festivo,Festividad
Dia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,martes,festivo,Festivo nacional,Año Nuevo
2013-01-02,miercoles,laborable,,
2013-01-03,jueves,laborable,,
2013-01-04,viernes,laborable,,
2013-01-05,sabado,sabado,,
...,...,...,...,...
2024-12-27,viernes,,,
2024-12-28,sábado,,,
2024-12-29,domingo,,,
2024-12-30,lunes,,,


In [30]:
get_holidays('2022-10-01','2022-12-31',raw_holydays).iloc[-10:]

Dia
2022-12-31 14:00:00    1
2022-12-31 15:00:00    1
2022-12-31 16:00:00    1
2022-12-31 17:00:00    1
2022-12-31 18:00:00    1
2022-12-31 19:00:00    1
2022-12-31 20:00:00    1
2022-12-31 21:00:00    1
2022-12-31 22:00:00    1
2022-12-31 23:00:00    1
Freq: H, Name: festivos, dtype: int64