# Projet Intelligence Artificielle

## Besoin client 3 : Système d'alerte pour les tempêtes

### Préparation des données

In [209]:
# Importation des librairies nécessaires

import pandas as pd
import numpy as np
import string
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

In [210]:
# Importation des bases de données 
data_prof = pd.read_csv('./Données/Data_Arbre.csv')
data_clean = pd.read_csv("./Données/data_clean.csv", encoding='utf-8', sep=";", decimal=",")

In [211]:
# Affichage des premières lignes des données 
# data_prof.head()


In [212]:
# Affichage des premières lignes des données de notre base de données
data_clean.head()

Unnamed: 0,X,Y,OBJECTID,created_date,created_user,src_geo,clc_quartier,clc_secteur,id_arbre,haut_tot,...,nomfrancais,GlobalID,CreationDate,Creator,EditDate,Editor,feuillage,remarquable,lon,lat
0,1720320.0,8294619.0,1,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Richelieu,24.0,0.0,...,RAS,{476EB2CE-1FD4-4F89-8162-79D75651225A},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.28254,49.850458
1,1720898.0,8293531.0,2,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,24.0,0.0,...,RAS,{B5A9F630-92C5-4B8A-A934-0CABDA46085E},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.290521,49.840654
2,1720894.0,8293542.0,3,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,53.0,0.0,...,RAS,{F5914EAD-05CD-4ADF-A7C9-55EFF91B2ABE},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.29046,49.840756
3,1720902.0,8293545.0,4,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,54.0,0.0,...,RAS,{41168E06-B7C0-43CD-B8FE-7495B6E93AB5},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.290568,49.840783
4,1721089.0,8293619.0,5,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,63.0,0.0,...,RAS,{4F0E4B12-4612-4F61-9911-43684831FD7D},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.293178,49.841441


In [213]:
# Récupération des colonnes désirées 
data = data_clean[["haut_tot","haut_tronc","tronc_diam","fk_arb_etat","fk_stadedev","age_estim", "fk_prec_estim","clc_quartier", "clc_secteur","fk_port","fk_pied","fk_situation","fk_revetement","feuillage"]]
data.head()

Unnamed: 0,haut_tot,haut_tronc,tronc_diam,fk_arb_etat,fk_stadedev,age_estim,fk_prec_estim,clc_quartier,clc_secteur,fk_port,fk_pied,fk_situation,fk_revetement,feuillage
0,0.0,0.0,0.0,SUPPRIMÉ,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Richelieu,Inexistant,Inexistant,Alignement,,
1,0.0,0.0,0.0,ABATTU,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Léon Blum,Inexistant,Inexistant,Alignement,,
2,0.0,0.0,0.0,SUPPRIMÉ,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Léon Blum,Inexistant,Inexistant,Alignement,,
3,0.0,0.0,0.0,SUPPRIMÉ,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Léon Blum,Inexistant,Inexistant,Alignement,,
4,0.0,0.0,0.0,ABATTU,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Léon Blum,Inexistant,Inexistant,Alignement,,


In [214]:
# Autre méthode pour récuperer les colonnes désirées
colonnes = ["haut_tot","haut_tronc","tronc_diam","fk_arb_etat","fk_stadedev","age_estim", "fk_prec_estim","clc_quartier", "clc_secteur","fk_port","fk_pied","fk_situation","fk_revetement","feuillage"]
data_new = data_clean[colonnes]

In [215]:
# Corrélation entre les variables numériques
correlation = data.corr(method='pearson', numeric_only= True).style.background_gradient(cmap='Pastel1')
correlation

Unnamed: 0,haut_tot,haut_tronc,tronc_diam,age_estim,fk_prec_estim
haut_tot,1.0,0.441569,0.678304,0.678828,0.46395
haut_tronc,0.441569,1.0,0.327135,0.505737,0.292037
tronc_diam,0.678304,0.327135,1.0,0.857629,0.508601
age_estim,0.678828,0.505737,0.857629,1.0,0.855497
fk_prec_estim,0.46395,0.292037,0.508601,0.855497,1.0


In [216]:
index = data[(data["fk_arb_etat"] == 'SUPPRIMÉ') | 
             (data["fk_arb_etat"]=='ABATTU') | 
             (data["fk_arb_etat"]=='EN PLACE') | 
             (data["fk_arb_etat"]=='REMPLACÉ')].index
data.drop(index, inplace = True)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(index, inplace = True)


Unnamed: 0,haut_tot,haut_tronc,tronc_diam,fk_arb_etat,fk_stadedev,age_estim,fk_prec_estim,clc_quartier,clc_secteur,fk_port,fk_pied,fk_situation,fk_revetement,feuillage
12,0.0,0.0,0.0,Essouché,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Henri Martin,Inexistant,Inexistant,Alignement,,
14,0.0,0.0,0.0,Essouché,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Henri Martin,Inexistant,Inexistant,Alignement,,
15,0.0,0.0,0.0,Essouché,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Henri Martin,Inexistant,Inexistant,Alignement,,
16,0.0,0.0,0.0,Essouché,Inexistant,11.170052,7.481492,Quartier du Centre-Ville,Boulevard Henri Martin,Inexistant,Inexistant,Alignement,,
19,0.0,0.0,0.0,Non essouché,Inexistant,16.245164,10.0,Quartier Saint-Jean,Rue Demoustier,Inexistant,fosse arbre,Alignement,Non,


In [217]:
# Binarisation des valeurs de fk_arb_etat

data.loc[data["fk_arb_etat"] == "Essouché","fk_arb_etat"] = 1
data.loc[data["fk_arb_etat"] != 1,"fk_arb_etat"] = 0
data.fk_arb_etat = data.fk_arb_etat.astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.fk_arb_etat = data.fk_arb_etat.astype(int)


In [218]:
# Changement des ordinalités dans les colonnes en charactère
encodeur = OrdinalEncoder()
cols = ["clc_quartier", "clc_secteur","fk_port","fk_pied","fk_situation","fk_revetement","feuillage"]
# cols = ["clc_quartier","fk_situation","feuillage"]
changement = data[cols]
data[cols] = encodeur.fit_transform(changement)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[cols] = encodeur.fit_transform(changement)


In [219]:
# Visualisation des données transformées
print(data)

      haut_tot  haut_tronc  tronc_diam  fk_arb_etat fk_stadedev  age_estim  \
12         0.0         0.0         0.0            1  Inexistant  11.170052   
14         0.0         0.0         0.0            1  Inexistant  11.170052   
15         0.0         0.0         0.0            1  Inexistant  11.170052   
16         0.0         0.0         0.0            1  Inexistant  11.170052   
19         0.0         0.0         0.0            0  Inexistant  16.245164   
...        ...         ...         ...          ...         ...        ...   
8205       4.0         2.0        17.0            1       Jeune   7.140417   
8221       9.0         3.0        85.0            1       Jeune  25.346017   
8222       9.0         3.0        86.0            1       Jeune  25.493089   
8223       9.0         4.0        98.0            1       Jeune  29.581774   
8311       5.0         2.0        25.0            0       Jeune   8.284093   

      fk_prec_estim  clc_quartier  clc_secteur  fk_port  fk_pie

In [220]:
# Nouvelle corrélation entre les variables numériques
new_correlation = data.corr(method='pearson', numeric_only= True).style.background_gradient(cmap='Pastel1')
new_correlation

Unnamed: 0,haut_tot,haut_tronc,tronc_diam,fk_arb_etat,age_estim,fk_prec_estim,clc_quartier,clc_secteur,fk_port,fk_pied,fk_situation,fk_revetement,feuillage
haut_tot,1.0,0.665377,0.72991,0.090665,0.699162,0.237857,-0.089429,0.123478,0.551522,0.29667,0.409936,0.149522,-0.219939
haut_tronc,0.665377,1.0,0.621905,0.049869,0.696205,0.209201,-0.15172,0.041807,0.564093,0.162339,0.279502,0.144864,-0.199708
tronc_diam,0.72991,0.621905,1.0,0.090675,0.886681,0.28829,-0.067684,0.040349,0.577412,0.22456,0.251526,0.256701,-0.072017
fk_arb_etat,0.090665,0.049869,0.090675,1.0,0.024848,-0.095218,0.104203,0.118864,0.022022,0.002397,0.146225,0.0786,0.017134
age_estim,0.699162,0.696205,0.886681,0.024848,1.0,0.668092,-0.070354,-0.030863,0.57964,0.233696,0.140904,0.287926,-0.203083
fk_prec_estim,0.237857,0.209201,0.28829,-0.095218,0.668092,1.0,0.010141,-0.144045,0.221816,0.135763,-0.166544,0.204917,-0.246055
clc_quartier,-0.089429,-0.15172,-0.067684,0.104203,-0.070354,0.010141,1.0,-0.226094,-0.217004,-0.085009,-0.085563,-0.010145,-0.064191
clc_secteur,0.123478,0.041807,0.040349,0.118864,-0.030863,-0.144045,-0.226094,1.0,0.183048,0.314763,0.253576,-0.000521,-0.02656
fk_port,0.551522,0.564093,0.577412,0.022022,0.57964,0.221816,-0.217004,0.183048,1.0,0.443063,0.147989,0.150396,-0.116303
fk_pied,0.29667,0.162339,0.22456,0.002397,0.233696,0.135763,-0.085009,0.314763,0.443063,1.0,0.180847,0.196572,0.030464


In [221]:
# Classification des données en X

x = data[["haut_tot", "haut_tronc","tronc_diam","clc_quartier"]]
print(x)
print(x.shape)

      haut_tot  haut_tronc  tronc_diam  clc_quartier
12         0.0         0.0         0.0           7.0
14         0.0         0.0         0.0           7.0
15         0.0         0.0         0.0           7.0
16         0.0         0.0         0.0           7.0
19         0.0         0.0         0.0           3.0
...        ...         ...         ...           ...
8205       4.0         2.0        17.0           7.0
8221       9.0         3.0        85.0           2.0
8222       9.0         3.0        86.0           2.0
8223       9.0         4.0        98.0           2.0
8311       5.0         2.0        25.0           4.0

[197 rows x 4 columns]
(197, 4)


In [222]:
# Classification des données en Y
y = data[["fk_arb_etat"]]
print(y)
print(y.shape)

      fk_arb_etat
12              1
14              1
15              1
16              1
19              0
...           ...
8205            1
8221            1
8222            1
8223            1
8311            0

[197 rows x 1 columns]
(197, 1)


In [223]:
print(np.unique(y))

[0 1]


In [224]:
x_train, y_train = x[:6281],y[:6281]
x_test, y_test = x[6281:],y[6281:]

### Apprentissage Supervisé pour la classification

#### Classification binaire

In [225]:
randomforest = RandomForestClassifier(n_estimators= 200, random_state= 42)
randomforest.fit(x_train,y_train)

  return fit_method(estimator, *args, **kwargs)


In [226]:
gridsearch = GridSearchCV(estimator=randomforest, param_grid= {'n_estimators' : [i*25 for i in range(1,20)]},cv= 5, n_jobs=-1)
meilleur_model = gridsearch.fit(x_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [227]:
print(gridsearch.best_estimator_)

RandomForestClassifier(random_state=42)


### Métrique pour la classification

In [228]:
taux = cross_val_score(meilleur_model, x_train, y_train, cv = 3, scoring='accuracy')
print(taux)

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[0.43939394 0.51515152 0.66153846]


In [229]:
print(np.mean(taux))

0.5386946386946386


#### Matrice de confusion

In [230]:
predictions = cross_val_predict(gridsearch, x_train, y_train, cv = 3)
print(predictions)

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[0 0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 0 0 1 0 1
 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 0 1 0 1 1 0 0
 0 0 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1
 1 1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 1 1 1 1]


In [231]:
matrice = confusion_matrix(y_train,predictions, normalize = "true")
print(matrice)

[[0.3        0.7       ]
 [0.35766423 0.64233577]]


### Autres classifieurs

In [233]:
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(n_estimators= 175, random_state= 42)
ada.fit(x_train,y_train)

gridsearch = GridSearchCV(estimator=ada, param_grid= {'n_estimators' : [i*25 for i in range(1,20)]},cv= 5, n_jobs=-1)
meilleur_model = gridsearch.fit(x_train, y_train)

taux = cross_val_score(meilleur_model, x_train, y_train, cv = 3, scoring='accuracy')
print(taux)

print(np.mean(taux))

predictions = cross_val_predict(gridsearch, x_train, y_train, cv = 3)
print(predictions)

matrice = confusion_matrix(y_train,predictions, normalize = "true")
print(matrice)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0.40909091 0.54545455 0.49230769]
0.4822843822843823


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0 0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1
 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 0 0 0 1 1 1 0 1 0 1 1 0 0
 0 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1
 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 1
 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1
 1 1 1 0 1 1 1 1 1 1 1 1]
[[0.26666667 0.73333333]
 [0.42335766 0.57664234]]


### Préparation du Script