In [1]:
import pandas as pd
import numpy as np
from haversine import haversine, Unit

In [2]:
df_disruption = pd.read_csv("data/chantiers-perturbants.csv", sep=";")

In [3]:
df_counters = pd.read_parquet("data/train.parquet")

In [4]:
df_counters = df_counters[["counter_name", "latitude", "longitude"]]

In [5]:
df_counters = df_counters.drop_duplicates()

In [6]:
df_counters.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 56 entries, 48321 to 920126
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   counter_name  56 non-null     category
 1   latitude      56 non-null     float64 
 2   longitude     56 non-null     float64 
dtypes: category(1), float64(2)
memory usage: 3.9 KB


In [7]:
df_disruption.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 19 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Identifiant                            192 non-null    object 
 1   Identifiant CTV                        128 non-null    object 
 2   Code postal de l'arrondissement        190 non-null    float64
 3   Numéro de STV                          191 non-null    float64
 4   Typologie                              189 non-null    float64
 5   Maitre d'ouvrage                       188 non-null    object 
 6   Objet                                  191 non-null    object 
 7   Description                            157 non-null    object 
 8   Voie(s)                                190 non-null    object 
 9   Précisions de localisation             174 non-null    object 
 10  Date de début                          191 non-null    object 
 11  Date d

In [8]:
df_disruption.head()

Unnamed: 0,Identifiant,Identifiant CTV,Code postal de l'arrondissement,Numéro de STV,Typologie,Maitre d'ouvrage,Objet,Description,Voie(s),Précisions de localisation,Date de début,Date de fin,Impact sur la circulation,Détail de l'impact sur la circulation,Niveau de perturbation,Statut,URL LettreInfoChantier,geo_shape,geo_point_2d
0,CP001735,,75018.0,9.0,1.0,SG Mission des JO 2024,CONSTRUCTION_IMMEUBLE,Arena 2,avenue de la Porte de la Chapelle,n°6,2020-09-07,2023-07-31,BARRAGE_TOTAL,entre le bd Ney et l'avenue de la pte de la Ch...,2.0,2,,"{""coordinates"": [[[2.3603260350936983, 48.8987...","48.89941650246351, 2.3610672014134044"
1,CP002276,,75017.0,9.0,3.0,ICF HABITAT,CONSTRUCTION_IMMEUBLE,Création de 111 logements sociaux.\nEmprise pr...,Voie Bus,Entre le 188 bis et 188 ter avenue de Clichy,2022-05-02,2024-06-30,RESTREINTE,Déviation bus dans circulation générale\nCréat...,2.0,2,http://xdir-CGPub-prd.ressources.paris.mdp/Pdf...,"{""coordinates"": [[[2.315884598767695, 48.89320...","48.89326507701689, 2.3156964416358314"
2,CP002368,654245.0,75005.0,12.0,2.0,Eau de Paris,ENTRETIEN_RESEAU,Renouvellement conduits EP,"Gobelins, Monge, Censier et Mirbel",13-17 av Gobelins + 111-115 rue Monge + 30 rue...,2022-08-08,2023-01-20,RESTREINTE,Déviation circulation générale (Bus + vélos),2.0,2,,"{""coordinates"": [[[2.352170678013787, 48.84077...","48.84036171643268, 2.3520135239093847"
3,CP002390,689822.0,75014.0,12.0,3.0,Congrégation St Joseph,REHABILITATION_IMMEUBLE,,Rue Méchain,du 19b au 21,2022-10-05,2023-12-15,RESTREINTE,"Maintien de 4,00m de circulation",2.0,2,,"{""coordinates"": [[[[2.3384636559592034, 48.835...","48.83573836601194, 2.3385684617326348"
4,CP002429,662837.0,75015.0,13.0,3.0,SCCV Porte de Brancion,CONSTRUCTION_IMMEUBLE,"Construction neuve, résidence sociale et comme...",Avenue de la Porte de Brancion,Carrefour au-dessus du périphérique,2022-11-14,2024-03-15,RESTREINTE,"restriction à é files de circulation, maintien...",2.0,2,,"{""coordinates"": [[[2.299950868215911, 48.82590...","48.82574599992239, 2.300005595088703"


In [9]:
def calculate_distance(lat1, lon1, lat2, lon2):
    return haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS)

In [10]:
df_disruption[["latitude", "longitude"]] = (
    df_disruption["geo_point_2d"].str.split(",", expand=True).astype(float)
)

In [11]:
results = []

In [12]:
for index, counter in df_counters.iterrows():
    for index2, disruption in df_disruption.iterrows():
        distance = calculate_distance(
            counter["latitude"],
            counter["longitude"],
            disruption["latitude"],
            disruption["longitude"],
        )
        if distance <= 200:
            results.append(
                {
                    "Counter Name": counter["counter_name"],
                    "Disruption": disruption["Identifiant"],
                    "Distance (m)": distance,
                    "Intensity": disruption["Niveau de perturbation"],
                    "Start Date": disruption["Date de début"],
                    "End Date": disruption["Date de fin"],
                }
            )

df_results = pd.DataFrame(results)

In [15]:
df_results

Unnamed: 0,Counter Name,Disruption,Distance (m),Intensity,Start Date,End Date
0,Pont des Invalides S-N,CP002471,198.695428,2.0,2023-04-06,2023-05-05
1,Pont des Invalides N-S,CP002471,194.132221,2.0,2023-04-06,2023-05-05
2,Totem Cours la Reine E-O,CP002514,165.52642,1.0,2023-07-03,2023-08-01
3,Totem Cours la Reine E-O,CP002497,71.794931,1.0,2023-06-01,2023-07-31
4,Totem Cours la Reine E-O,CP002525,181.573876,2.0,2023-07-10,2023-08-31
5,Totem Cours la Reine O-E,CP002514,165.52642,1.0,2023-07-03,2023-08-01
6,Totem Cours la Reine O-E,CP002497,71.794931,1.0,2023-06-01,2023-07-31
7,Totem Cours la Reine O-E,CP002525,181.573876,2.0,2023-07-10,2023-08-31
8,254 rue de Vaugirard NE-SO,CP002466,61.4871,2.0,2023-02-23,2023-07-01
9,254 rue de Vaugirard SO-NE,CP002466,61.4871,2.0,2023-02-23,2023-07-01


In [14]:
# Switching 1 to 2 and 2 to 1 in the 'Intensity' column
df_results["Intensity"] = df_results["Intensity"].replace({1: 2, 2: 1})

In [16]:
df_results.to_csv("data/disruption.csv")