# Projet Intelligence Artificielle

## Besoin client 3 : Système d'alerte pour les tempêtes

### Préparation des données

In [3]:
# Importation des librairies nécessaires

import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder

In [4]:
# Importation des bases de données 
data_prof = pd.read_csv('./Données/Data_Arbre.csv')
data_clean = pd.read_csv("./Données/data_clean.csv", encoding='utf-8', sep=";", decimal=",")

In [5]:
# Affichage des premières lignes des données 
data_prof.head()


Unnamed: 0,longitude,latitude,clc_quartier,clc_secteur,haut_tot,haut_tronc,tronc_diam,fk_arb_etat,fk_stadedev,fk_port,fk_pied,fk_situation,fk_revetement,age_estim,fk_prec_estim,clc_nbr_diag,fk_nomtech,villeca,feuillage,remarquable
0,3.293264,49.8405,Quartier du Centre-Ville,Quai Gayant,6.0,2.0,37.0,EN PLACE,Jeune,semi libre,gazon,Alignement,Non,15.0,5.0,0.0,QUERUB,VILLE,Feuillu,Non
1,3.27338,49.861409,Quartier du Vermandois,Stade Cepy,13.0,1.0,160.0,EN PLACE,Adulte,semi libre,gazon,Groupe,Non,50.0,10.0,0.0,PINNIGnig,VILLE,Conifère,Non
2,3.289068,49.844513,Quartier du Centre-Ville,Rue Villebois Mareuil,12.0,3.0,116.0,REMPLACÉ,Adulte,semi libre,gazon,Alignement,Non,30.0,10.0,0.0,ACEPSE,VILLE,Feuillu,Non
3,3.302387,49.861778,Quartier de l'Europe,Square des Marronniers,16.0,3.0,150.0,EN PLACE,Adulte,semi libre,gazon,Groupe,Non,50.0,2.0,0.0,ACEPLA,VILLE,Feuillu,Non
4,3.304047,49.858446,Quartier de l'Europe,Avenue Buffon,5.0,2.0,170.0,Essouché,Adulte,réduit,gazon,Isolé,Non,40.0,2.0,0.0,SALBAB,VILLE,Feuillu,Non


In [6]:
# Affichage des premières lignes des données de notre base de données
data_clean.head()

Unnamed: 0,X,Y,OBJECTID,created_date,created_user,src_geo,clc_quartier,clc_secteur,id_arbre,haut_tot,...,nomfrancais,GlobalID,CreationDate,Creator,EditDate,Editor,feuillage,remarquable,lon,lat
0,1720320.0,8294619.0,1,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Richelieu,24.0,0.0,...,RAS,{476EB2CE-1FD4-4F89-8162-79D75651225A},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.28254,49.850458
1,1720898.0,8293531.0,2,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,24.0,0.0,...,RAS,{B5A9F630-92C5-4B8A-A934-0CABDA46085E},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.290521,49.840654
2,1720894.0,8293542.0,3,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,53.0,0.0,...,RAS,{F5914EAD-05CD-4ADF-A7C9-55EFF91B2ABE},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.29046,49.840756
3,1720902.0,8293545.0,4,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,54.0,0.0,...,RAS,{41168E06-B7C0-43CD-B8FE-7495B6E93AB5},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.290568,49.840783
4,1721089.0,8293619.0,5,2017-02-02,mickael.delaere,Orthophoto,Quartier du Centre-Ville,Boulevard Léon Blum,63.0,0.0,...,RAS,{4F0E4B12-4612-4F61-9911-43684831FD7D},2018-01-15,mickael.delaere,2018-01-15,mickael.delaere,,Non,3.293178,49.841441


In [7]:
# Récupération des colonnes désirées 
data = data_clean[["haut_tot","haut_tronc","tronc_diam","fk_arb_etat","fk_stadedev","age_estim", "fk_prec_estim"]]

In [8]:
# Autre méthode pour récuperer les colonnes désirées
colonnes = ["haut_tot","haut_tronc","tronc_diam","fk_arb_etat","fk_stadedev","age_estim", "fk_prec_estim"]
data_new = data_clean[colonnes]

In [17]:
# Corrélation entre les variables numériques
correlation = data.corr(method='pearson', numeric_only= True).style.background_gradient(cmap='Pastel1')
correlation

Unnamed: 0,haut_tot,haut_tronc,tronc_diam,age_estim,fk_prec_estim
haut_tot,1.0,0.441569,0.678304,0.678828,0.46395
haut_tronc,0.441569,1.0,0.327135,0.505737,0.292037
tronc_diam,0.678304,0.327135,1.0,0.857629,0.508601
age_estim,0.678828,0.505737,0.857629,1.0,0.855497
fk_prec_estim,0.46395,0.292037,0.508601,0.855497,1.0


In [10]:
# Changement des ordinalités dans les colonnes en charactère
encodeur = OrdinalEncoder()
cols = ["clc_quartier", "clc_secteur","fk_arb_etat","fk_port","fk_pied","fk_situation","fk_revetement","feuillage","remarquable"]
changement = data_clean[cols]
data_clean[cols] = encodeur.fit_transform(changement)

In [11]:
# Visualisation des données transformées
print(data_clean)

                 X             Y  OBJECTID created_date     created_user  \
0     1.720320e+06  8.294619e+06         1   2017-02-02  mickael.delaere   
1     1.720898e+06  8.293531e+06         2   2017-02-02  mickael.delaere   
2     1.720894e+06  8.293542e+06         3   2017-02-02  mickael.delaere   
3     1.720902e+06  8.293545e+06         4   2017-02-02  mickael.delaere   
4     1.721089e+06  8.293619e+06         5   2017-02-02  mickael.delaere   
...            ...           ...       ...          ...              ...   
8968  1.721361e+06  8.295731e+06     12022          NaN              NaN   
8969  1.720979e+06  8.294349e+06     12023          NaN              NaN   
8970  1.721700e+06  8.295853e+06     12025          NaN              NaN   
8971  1.721691e+06  8.295820e+06     12027          NaN              NaN   
8972  1.721677e+06  8.295879e+06     12035          NaN              NaN   

         src_geo  clc_quartier  clc_secteur  id_arbre  haut_tot  ...  \
0     Orthophot

In [19]:
# Nouvelle corrélation entre les variables numériques
data = data_clean[["haut_tot","haut_tronc","tronc_diam","fk_arb_etat","fk_stadedev","age_estim", "fk_prec_estim"]]
new_correlation = data.corr(method='pearson', numeric_only= True).style.background_gradient(cmap='Pastel1')
new_correlation

Unnamed: 0,haut_tot,haut_tronc,tronc_diam,fk_arb_etat,age_estim,fk_prec_estim
haut_tot,1.0,0.441569,0.678304,0.014495,0.678828,0.46395
haut_tronc,0.441569,1.0,0.327135,0.008343,0.505737,0.292037
tronc_diam,0.678304,0.327135,1.0,0.052773,0.857629,0.508601
fk_arb_etat,0.014495,0.008343,0.052773,1.0,0.102961,0.136806
age_estim,0.678828,0.505737,0.857629,0.102961,1.0,0.855497
fk_prec_estim,0.46395,0.292037,0.508601,0.136806,0.855497,1.0


In [13]:
# Classification des données en X

x = data_clean[["haut_tot", "haut_tronc","tronc_diam","clc_quartier"]]
print(x)
print(x.shape)

      haut_tot  haut_tronc  tronc_diam  clc_quartier
0          0.0         0.0         0.0           7.0
1          0.0         0.0         0.0           7.0
2          0.0         0.0         0.0           7.0
3          0.0         0.0         0.0           7.0
4          0.0         0.0         0.0           7.0
...        ...         ...         ...           ...
8968       3.0         2.0        15.0           6.0
8969       3.0         2.0        15.0           7.0
8970       3.0         2.0        15.0           6.0
8971       3.0         2.0        15.0           6.0
8972       4.0         2.0        15.0           6.0

[8973 rows x 4 columns]
(8973, 4)


In [14]:
# Classification des données en Y
y = data_clean[["fk_arb_etat"]]
print(y)
print(y.shape)

      fk_arb_etat
0             5.0
1             0.0
2             5.0
3             5.0
4             0.0
...           ...
8968          1.0
8969          1.0
8970          1.0
8971          1.0
8972          1.0

[8973 rows x 1 columns]
(8973, 1)


In [20]:
print(np.unique(y))

[0. 1. 2. 3. 4. 5.]


In [16]:
x_train, y_train = x[:6281],y[:6281]
x_test, y_test = x[6281:],y[6281:]

### Apprentissage Supervisé pour la classification

### Métrique pour la classification

### Préparation du Script