# Análisis predictivo de la disponibilidad de bicicletas en estaciones BiciMad
Pablo Ariño & Álvaro Laguna

## Importación de librerías y cofiguración

In [1]:
import os

import dotenv

from src.data import (
    find_json_files_in_directory,
    load_json_files,
    load_json_objects,
)

dotenv.load_dotenv()

True

In [2]:
DATA_PATH = os.getenv("DATA_PATH", "data")
DATA_RAW_PATH = DATA_PATH + "raw/"
DATA_PROCESSED_PATH = DATA_PATH + "processed/"
DATA_FILES = sorted(find_json_files_in_directory(DATA_PATH + "raw"))
print(f"Data files: {DATA_FILES}")
TARGET_STATION_NAME = "Puerta de Madrid"

Data files: ['202210.json', '202211.json', '202212.json']


In [None]:
# Processed data names
STATIONS_DATA_FILENAME = "stations.csv"
DOCK_BIKES_DATA_FILENAME = "dock_bikes_timeseries.csv"

## Load data

In [4]:
raw_data = load_json_files(DATA_RAW_PATH, DATA_FILES)

In [5]:
len(raw_data)

2181

## Process data

In [None]:
stations = build_stations_dataframe(raw_data)
stations

In [None]:
# Save to CSV
stations.to_csv(DATA_PROCESSED_PATH + STATIONS_DATA_FILENAME, index=False)

In [None]:
top_5_closest_stations = get_k_closest_stations(
    TARGET_STATION_NAME, stations, k=5,
)

In [None]:
top_5_closest_stations_names = top_5_closest_stations["name"].tolist()
plot_stations_in_map(
    stations,
    target_station_name=TARGET_STATION_NAME,
    other_stations_names=top_5_closest_stations_names,
)

In [None]:
station_names = [TARGET_STATION_NAME] + top_5_closest_stations_names
dock_bikes_timeseries = build_dock_bikes_timeseries_dataframe(
    raw_data, station_names
)

In [None]:
# Save to CSV
dock_bikes_timeseries.to_csv(
    DATA_PROCESSED_PATH + DOCK_BIKES_DATA_FILENAME, index=False
)