# Cleaning and preprocessing museum data

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from pathlib import Path

import geopandas as gpd
import numpy as np
import plotly.express as px
import pandas as pd
from geopy.geocoders import Nominatim


from paris_bikes.preprocess_data import get_population_per_iris, get_idfm_parkings_per_iris



## Load files

In [4]:
census_filepath = Path("../data/raw/RECENSEMENT_IRIS_POPULATION.geojson")
parking_idfm_filepath = Path("../data/raw/parking-velos-ile-de-france-mobilites.csv")

df_census = gpd.read_file(census_filepath)
df_parking_idfm = pd.read_csv(parking_idfm_filepath, sep=";")
df_iris = get_population_per_iris(df_census)

## Calculate parking spots per IRIS

In [9]:
# Transform IDFM parking DataFrame to GeoDataFrame
gdf_parking_idfm = gpd.GeoDataFrame(
    df_parking_idfm, 
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(
        df_parking_idfm.x_long, df_parking_idfm.y_lat
    )
)

In [10]:
print(gdf_parking_idfm)

                                             Geo Shape line_id  station_id  \
0    {"coordinates": [2.562582, 48.897935], "type":...      T4        1013   
1    {"coordinates": [2.256876771, 48.94751081], "t...       J          67   
2    {"coordinates": [2.163078, 48.695907], "type":...       B         678   
3    {"coordinates": [2.709285, 48.836608], "type":...       A         132   
4    {"coordinates": [2.027858137, 48.97052555], "t...       J          60   
..                                                 ...     ...         ...   
304  {"coordinates": [2.3518, 48.6753], "type": "Po...       C         240   
305  {"coordinates": [2.7598, 48.7393], "type": "Po...       E         148   
306  {"coordinates": [2.231110057, 48.95030437], "t...       J         184   
307  {"coordinates": [2.4141, 48.708], "type": "Poi...       D         201   
308  {"coordinates": [2.44723756, 48.73056138], "ty...       D          52   

                         name      type  num_docks_available  \

In [11]:
# Calculate IDFM parking spots per IRIS
df_idfm_parks_per_iris = get_idfm_parkings_per_iris(gdf_parking_idfm, df_iris)

In [12]:
print(df_idfm_parks_per_iris)

                          nb_parking_spots
iris                                      
Europe 8                                39
Necker 18                               60
Saint-Vincent de Paul 10               109


In [13]:
df_idfm_parks_per_iris.to_csv("../data/primary/idfm_parking.csv")