# Arbres de Montréal

2025-10-29

Noé Aubin-Cadot

On compare ici les arbres de Montréal entre 2021 et 2025.

In [1]:
# Import de librairies
import pandas as pd
import numpy as np

In [2]:
# Import des données

# Vieilles données
input_file_old = 'input/2021-12-11_arbres-publics.csv'
df_old = pd.read_csv(input_file_old)

# Nouvelles données
input_file_new = 'input/2025-10-29_arbres-publics.csv'
df_new = pd.read_csv(input_file_new)

  df_old = pd.read_csv(input_file_old)
  df_new = pd.read_csv(input_file_new)


In [3]:
# On regarde la taille des données
print("df_old.shape :",df_old.shape)
print("df_new.shape :",df_new.shape)

df_old.shape : (325066, 22)
df_new.shape : (328642, 33)


In [4]:
# On renomme des colonnes pour harmoniser les anciennes données aux nouvelles
df_old = df_old.rename(columns = {
    "Date_plantation":"Date_Plantation",
    "ESSENCE_ANG": "Essence_ang",
    "SIGLE": "Sigle",
    "COTE": "Rue_cote",
    "Date_releve": "Date_Releve",
})

In [21]:
# On regarde quelles colonnes sont partagées entre les anciennes et les nouvelles données
cols_old = set(df_old.columns)
cols_new = set(df_new.columns)

print("New moins Old :",cols_new-cols_old)
print("\nOld moins New :",cols_old-cols_new)
print("\nOld inter New :",cols_old.intersection(cols_new))

New moins Old : {'Stationnement_heure', 'Distance_pave', 'Rue_de', 'Stationnement_jour', 'Arbre_remarquable', 'Code_secteur', 'Localisation_code', 'Rue_a', 'District', 'Distance_ligne_rue', 'Nom_secteur'}

Old moins New : set()

Old inter New : {'Sigle', 'LOCALISATION', 'Coord_Y', 'Rue_cote', 'INV_TYPE', 'Essence_fr', 'Essence_latin', 'DHP', 'id_arbre', 'Date_Releve', 'Essence_ang', 'Latitude', 'Longitude', 'No_civique', 'EMP_NO', 'ARROND', 'Emplacement', 'CODE_PARC', 'NOM_PARC', 'Coord_X', 'Date_Plantation', 'ARROND_NOM', 'Rue'}


In [6]:
# On met certaines colonnes en premier
cols_first = ['EMP_NO','Date_Plantation']
df_old = df_old[cols_first+[col for col in df_old.columns if col not in cols_first]]
df_new = df_new[cols_first+[col for col in df_new.columns if col not in cols_first]]

In [7]:
# On met l'emplacement en format string avec padding de 0 à gauche des entiers
df_old['EMP_NO'] = df_old['EMP_NO'].astype(str).str.zfill(7)
df_new['EMP_NO'] = df_new['EMP_NO'].astype(str).str.zfill(7)

In [8]:
# On remplit les dates par un string NaN
df_old['Date_Plantation'] = df_old['Date_Plantation'].fillna("NaN")
df_new['Date_Plantation'] = df_new['Date_Plantation'].fillna("NaN")

In [9]:
# Créer un ID par arbre
df_old['id_arbre'] = df_old['EMP_NO'] + '_' + df_old['Sigle'] + '_' + df_old['Date_Releve'] + '_' + df_old['Date_Plantation']
df_new['id_arbre'] = df_new['EMP_NO'] + '_' + df_new['Sigle'] + '_' + df_new['Date_Releve'] + '_' + df_new['Date_Plantation']

In [10]:
# On élimine les lignes sans id
print("Avant enlever id manquant (old) :",df_old['id_arbre'].isna().sum())
print("Avant enlever id manquant (new) :",df_new['id_arbre'].isna().sum())

df_old = df_old.dropna(subset='id_arbre')
df_new = df_new.dropna(subset='id_arbre')

print("Après enlever id manquant (old) :",df_old['id_arbre'].isna().sum())
print("Après enlever id manquant (new) :",df_new['id_arbre'].isna().sum())

Avant enlever id manquant (old) : 653
Avant enlever id manquant (new) : 578
Après enlever id manquant (old) : 0
Après enlever id manquant (new) : 0


In [11]:
# On met certaines colonnes en premier
cols_first = ['id_arbre']
df_old = df_old[cols_first+[col for col in df_old.columns if col not in cols_first]]
df_new = df_new[cols_first+[col for col in df_new.columns if col not in cols_first]]

In [12]:
# On s'assure qu'on a bien créé un ID unique
print("id count old :",df_old['id_arbre'].value_counts().max())
print("id count new :",df_new['id_arbre'].value_counts().max())

id count old : 1
id count new : 1


In [13]:
# Ensembles des identifiants
set_old = set(df_old["id_arbre"])
set_new = set(df_new["id_arbre"])

# Arbres stables (présents dans les deux)
stable = set_old & set_new

# Arbres disparus (présents avant, absents maintenant)
disparus = set_old - set_new

# Nouveaux arbres (absents avant, présents maintenant)
nouveaux = set_new - set_old

avant = set_old
après = set_new

# Tous
union = set_new.union(set_old)

# Résumé
print(f"🌳 Arbres stables  : {len(stable):,}")
print(f"🪓 Arbres disparus : {len(disparus):,}")
print(f"🌱 Nouveaux arbres : {len(nouveaux):,}")
print(f"Tous : {len(union):,}")

🌳 Arbres stables  : 148,596
🪓 Arbres disparus : 175,817
🌱 Nouveaux arbres : 179,468
Tous : 503,881


In [14]:
# Jointure entre les anciennes et les nouvelles données via l'id par arbre
df = pd.merge(
    left     = df_old,
    right    = df_new,
    on       = ['id_arbre'],
    how      = 'outer',
    suffixes = ('_2021', '_2025'),
)
df

Unnamed: 0,id_arbre,EMP_NO_2021,Date_Plantation_2021,INV_TYPE_2021,ARROND_2021,ARROND_NOM_2021,Rue_2021,Rue_cote_2021,No_civique_2021,Emplacement_2021,...,Stationnement_jour,Stationnement_heure,District,Arbre_remarquable,Code_secteur,Nom_secteur,Coord_X_2025,Coord_Y_2025,Longitude_2025,Latitude_2025
0,0000001_GYDI_2021-05-20T00:00:00_2021-05-20T00...,0000001,2021-05-20T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Ontario Est ...,S,3912.0,TRottoir,...,L-V,08:00-09:30,540.0,N,,,301487.951,5045396.431,-73.542417,45.548473
1,0000002_ACPLCO_2004-06-16T00:00:00_2004-06-16T...,0000002,2004-06-16T00:00:00,H,5.0,Le Plateau-Mont-Royal,,,,TRottoir,...,,,,,,,,,,
2,0000002_ACPLCO_2023-05-08T00:00:00_2004-06-16T...,,,,,,,,,,...,,,,N,1.0,1,299130.596,5041570.500,-73.572563,45.514031
3,0000002_ACPL_2019-07-05T00:00:00_1983-10-01T00...,0000002,1983-10-01T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Sainte-Catherine Est ...,N,,TRottoir,...,,,540.0,N,,,302147.085,5045694.500,-73.533977,45.551158
4,0000003_ACPL_2019-07-05T00:00:00_1995-05-01T00...,0000003,1995-05-01T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Sainte-Catherine Est ...,N,4353.0,TRottoir,...,,,540.0,N,,,302151.053,5045702.500,-73.533926,45.551230
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
503876,0746645_JNVI_2025-10-28T00:00:00_NaN,,,,,,,,,,...,,,101.0,N,,,276446.812,5037001.381,-73.862626,45.472364
503877,0746647_ACGI_2025-10-27T00:00:00_NaN,,,,,,,,,,...,INTE,,560.0,N,,,300775.996,5047717.716,-73.551554,45.569357
503878,0746648_ACSA_2025-10-28T00:00:00_NaN,,,,,,,,,,...,,,101.0,N,,,276067.349,5037173.203,-73.867489,45.473894
503879,0746649_CGCGIN_2025-10-28T00:00:00_NaN,,,,,,,,,,...,,,101.0,N,,,275970.548,5037362.372,-73.868739,45.475592


In [15]:
# On crée quelques colonnes booléennes
df['stable']  = df['id_arbre'].isin(stable)
df['disparu'] = df['id_arbre'].isin(disparus)
df['nouveau'] = df['id_arbre'].isin(nouveaux)
df['avant'] = df['id_arbre'].isin(avant)
df['après'] = df['id_arbre'].isin(après)
df['tout'] = df['id_arbre'].isin(union)
df

Unnamed: 0,id_arbre,EMP_NO_2021,Date_Plantation_2021,INV_TYPE_2021,ARROND_2021,ARROND_NOM_2021,Rue_2021,Rue_cote_2021,No_civique_2021,Emplacement_2021,...,Coord_X_2025,Coord_Y_2025,Longitude_2025,Latitude_2025,stable,disparu,nouveau,avant,après,tout
0,0000001_GYDI_2021-05-20T00:00:00_2021-05-20T00...,0000001,2021-05-20T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Ontario Est ...,S,3912.0,TRottoir,...,301487.951,5045396.431,-73.542417,45.548473,True,False,False,True,True,True
1,0000002_ACPLCO_2004-06-16T00:00:00_2004-06-16T...,0000002,2004-06-16T00:00:00,H,5.0,Le Plateau-Mont-Royal,,,,TRottoir,...,,,,,False,True,False,True,False,True
2,0000002_ACPLCO_2023-05-08T00:00:00_2004-06-16T...,,,,,,,,,,...,299130.596,5041570.500,-73.572563,45.514031,False,False,True,False,True,True
3,0000002_ACPL_2019-07-05T00:00:00_1983-10-01T00...,0000002,1983-10-01T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Sainte-Catherine Est ...,N,,TRottoir,...,302147.085,5045694.500,-73.533977,45.551158,True,False,False,True,True,True
4,0000003_ACPL_2019-07-05T00:00:00_1995-05-01T00...,0000003,1995-05-01T00:00:00,R,4.0,Mercier - Hochelaga-Maisonneuve,Rue Sainte-Catherine Est ...,N,4353.0,TRottoir,...,302151.053,5045702.500,-73.533926,45.551230,True,False,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
503876,0746645_JNVI_2025-10-28T00:00:00_NaN,,,,,,,,,,...,276446.812,5037001.381,-73.862626,45.472364,False,False,True,False,True,True
503877,0746647_ACGI_2025-10-27T00:00:00_NaN,,,,,,,,,,...,300775.996,5047717.716,-73.551554,45.569357,False,False,True,False,True,True
503878,0746648_ACSA_2025-10-28T00:00:00_NaN,,,,,,,,,,...,276067.349,5037173.203,-73.867489,45.473894,False,False,True,False,True,True
503879,0746649_CGCGIN_2025-10-28T00:00:00_NaN,,,,,,,,,,...,275970.548,5037362.372,-73.868739,45.475592,False,False,True,False,True,True


In [16]:
# On regarde s'il y a eu des variations en arbres remarquables
cols_count = [
    'tout',
    'avant',
    'après',
    'stable',
    'nouveau',
    'disparu',
]
df[['Arbre_remarquable']+cols_count].dropna(subset='Arbre_remarquable').groupby('Arbre_remarquable').sum()

Unnamed: 0_level_0,tout,avant,après,stable,nouveau,disparu
Arbre_remarquable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N,327905,148522,327905,148522,179383,0
O,159,74,159,74,85,0


In [17]:
# On crée une colonne de quartier
df['ARROND_NOM'] = df['ARROND_NOM_2025'].fillna(df['ARROND_NOM_2021'])
df['ARROND_NOM']

0                Mercier - Hochelaga-Maisonneuve
1                          Le Plateau-Mont-Royal
2                          Le Plateau-Mont-Royal
3                Mercier - Hochelaga-Maisonneuve
4                Mercier - Hochelaga-Maisonneuve
                           ...                  
503876                     Pierrefonds - Roxboro
503877           Mercier - Hochelaga-Maisonneuve
503878                     Pierrefonds - Roxboro
503879                     Pierrefonds - Roxboro
503880    Villeray-Saint-Michel - Parc-Extension
Name: ARROND_NOM, Length: 503881, dtype: object

In [18]:
# On regarde les gains en arbre par quartier
col = 'ARROND_NOM'
df_arrond = df[[col]+cols_count].dropna(subset=col).groupby(col).sum()
df_arrond['gain %'] = 100*(df_arrond['après']-df_arrond['avant'])/df_arrond['avant']
df_arrond

Unnamed: 0_level_0,tout,avant,après,stable,nouveau,disparu,gain %
ARROND_NOM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ahuntsic - Cartierville,47713,34351,35883,22521,13362,11830,4.459841
Anjou,17,17,0,0,0,17,-100.0
Côte-des-Neiges - Notre-Dame-de-Grâce,39617,26525,28291,15199,13092,11326,6.65787
LaSalle,22397,22397,0,0,0,22397,-100.0
Lachine,8702,0,8702,0,8702,0,inf
Le Plateau-Mont-Royal,29447,17427,19257,7237,12020,10190,10.500947
Le Sud-Ouest,25431,20607,21868,17044,4824,3563,6.11928
Mercier - Hochelaga-Maisonneuve,58757,29004,37482,7729,29753,21275,29.230451
Montréal-Est,5,5,0,0,0,5,-100.0
Pierrefonds - Roxboro,22246,16151,16671,10576,6095,5575,3.219615


In [19]:
# On regarde dans l'ordre croissant de gains en arbres
df_arrond.sort_values(by='gain %')

Unnamed: 0_level_0,tout,avant,après,stable,nouveau,disparu,gain %
ARROND_NOM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Montréal-Est,5,5,0,0,0,5,-100.0
Anjou,17,17,0,0,0,17,-100.0
Saint-Laurent,23086,23086,0,0,0,23086,-100.0
LaSalle,22397,22397,0,0,0,22397,-100.0
Pierrefonds - Roxboro,22246,16151,16671,10576,6095,5575,3.219615
Rosemont - La Petite-Patrie,46131,35888,37181,26938,10243,8950,3.602876
Ahuntsic - Cartierville,47713,34351,35883,22521,13362,11830,4.459841
Le Sud-Ouest,25431,20607,21868,17044,4824,3563,6.11928
Côte-des-Neiges - Notre-Dame-de-Grâce,39617,26525,28291,15199,13092,11326,6.65787
Le Plateau-Mont-Royal,29447,17427,19257,7237,12020,10190,10.500947


In [20]:
# On élimine les quartiers qui n'ont soit pas d'arbres avant soit pas d'arbres après
df_arrond.drop(
    index = [
        'Montréal-Est',
        'Anjou',
        'Saint-Laurent',
        'LaSalle',
        'Lachine',
    ],
).sort_values(by='gain %').round(2)

Unnamed: 0_level_0,tout,avant,après,stable,nouveau,disparu,gain %
ARROND_NOM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Pierrefonds - Roxboro,22246,16151,16671,10576,6095,5575,3.22
Rosemont - La Petite-Patrie,46131,35888,37181,26938,10243,8950,3.6
Ahuntsic - Cartierville,47713,34351,35883,22521,13362,11830,4.46
Le Sud-Ouest,25431,20607,21868,17044,4824,3563,6.12
Côte-des-Neiges - Notre-Dame-de-Grâce,39617,26525,28291,15199,13092,11326,6.66
Le Plateau-Mont-Royal,29447,17427,19257,7237,12020,10190,10.5
Verdun,22101,16314,18463,12676,5787,3638,13.17
Villeray-Saint-Michel - Parc-Extension,36501,23884,27375,14758,12617,9126,14.62
Ville-Marie,28805,16616,20114,7925,12189,8691,21.05
Saint-Léonard,42213,19185,23343,315,23028,18870,21.67
