Calcul des statistiques sur YOLO

L'entraînement du modèle a été réalisé avec *ultralytics==8.0.38*

**Utilise Tensorflow v2**

/!\ Attention, avec Tensorflow v2.14, l'installation du GPU ne fonctionne pas

# Paramétrage

## Librairies

In [None]:
# partie spécifique Google Colab
!pip install tensorflow==2.15.0 rasterio plotly scikit-image shapely
!pip install ultralytics==8.0.38
!pip install rasterio plotly scikit-image shapely

Collecting rasterio
  Downloading rasterio-1.3.9-cp310-cp310-manylinux2014_x86_64.whl (20.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.6/20.6 MB[0m [31m74.4 MB/s[0m eta [36m0:00:00[0m
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.9 snuggs-1.4.7
Collecting ultralytics==8.0.38
  Downloading ultralytics-8.0.38-py3-none-any.whl (278 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.1/278.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics==8.0.38)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting sentry-sdk (from ultralytics==8.0.38)
  Downloading sentry_sdk-1.44.1-py2.py3-none-any.whl (266 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import numpy as np
import glob
import plotly.express as px
import plotly.graph_objs as go
import cv2
import pandas as pd
import tqdm
from datetime import date
from google.colab import drive
import random
import matplotlib.pyplot as plt
import os
import math
from pathlib import Path
import shutil
from PIL import Image, ImageOps
from io import BytesIO
import requests

import geopandas as gpd
import rasterio
import shapely
import skimage as ski
from skimage.measure import find_contours, approximate_polygon, subdivide_polygon, label

from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, accuracy_score
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow import image as tf_image
from tensorflow import data as tf_data
from tensorflow import io as tf_io

import keras
from keras import layers, Model, backend
from keras.layers import Input, Rescaling, Dense, Dropout, Flatten, Conv2D, Conv2DTranspose, \
MaxPooling2D, AveragePooling2D, SpatialDropout2D, BatchNormalization, Activation, SeparableConv2D, \
UpSampling2D, GlobalAveragePooling2D
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from keras.utils import load_img, img_to_array, array_to_img

from ultralytics import YOLO
import torch

import warnings
warnings.filterwarnings('ignore')


## Import fonctions Python

In [None]:
# on vérifie que le répertoire courant est bien /content
!pwd

/content


In [None]:
# on copie les fichiers python avec les fonctions
! cp /content/drive/MyDrive/"Colab Notebooks"/python/manip_cartes.py /content/manip_cartes.py
! cp /content/drive/MyDrive/"Colab Notebooks"/python/calcul_metriques.py /content/calcul_metriques.py

In [None]:
from manip_cartes import *
from calcul_metriques import calcul_IoU, calcul_precision_recall, calcul_precision_recall2

## Configuration GPU

In [None]:
print(keras.__version__)
print(tf.__version__)
print(tf.test.gpu_device_name())
print(tf.config.list_physical_devices())

2.15.0
2.15.0
/device:GPU:0
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Données

In [None]:
# partie spécifique Google Colab
!mkdir -p /content/data/decoupe_final
!cp /content/drive/MyDrive/data/DST/df_decoupe_final.csv /content/data/decoupe_final/df_decoupe_final.csv
!unzip /content/drive/MyDrive/data/DST/decoupe_final.zip -d /content/data/decoupe_final

[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
 extracting: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.cpg  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.dbf  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.jp2  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.prj  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.shp  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33.shx  
 extracting: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33_data.jpg  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33_data.jpg.aux.xml  
  inflating: /content/data/decoupe_final/decoupe_final/69-2020_8_834400.0_22_6508200.0_33_label.png  
  inflating: /conten

In [None]:
data_path = '/content/data/decoupe_final'
images_path = '/content/data/decoupe_final/decoupe_final'
df_decoupe = pd.read_csv(data_path + '/df_decoupe_final.csv')
df_decoupe.head()

Unnamed: 0,dalle_X,dalle_Y,X,Y,X_size,...,fichier_mask,fichier_shapes,formes,multi,erreur
0,0,0,825000.0,6509800.0,1000,...,69-2020_0_825000.0_0_6509800.0_0_label.png,69-2020_0_825000.0_0_6509800.0_0.shp,0,0,0
1,0,1,825000.0,6509600.0,1000,...,69-2020_1_825000.0_0_6509600.0_0_label.png,69-2020_1_825000.0_0_6509600.0_0.shp,0,0,0
2,0,2,825000.0,6509400.0,1000,...,69-2020_2_825000.0_0_6509400.0_4_label.png,69-2020_2_825000.0_0_6509400.0_4.shp,4,0,0
3,0,3,825000.0,6509200.0,1000,...,69-2020_3_825000.0_0_6509200.0_4_label.png,69-2020_3_825000.0_0_6509200.0_4.shp,4,0,0
4,0,4,825000.0,6509000.0,1000,...,69-2020_4_825000.0_0_6509000.0_19_label.png,69-2020_4_825000.0_0_6509000.0_19.shp,19,0,0


## Fonctions

In [None]:
def predict_YOLOv8_compareligne(file_name, model, seuil):
  res = model.predict(file_name, save = False, classes = [0], imgsz = SIZE, conf = seuil, verbose=False)
  if len(res[0]) > 0:
    mask = tf.convert_to_tensor(torch.sum(res[0].cuda().masks.data, 0).cpu())
    mask = tf.clip_by_value(mask, 0, 1)
  else:
    mask = tf.zeros(RESOLUTION)
  return mask.numpy()

In [None]:
def compare_ligne(df, index_l, seuils, predict_function, model):
  file_name = images_path + '/' + df.loc[index_l,'fichier_img']
  mask_name = images_path + '/' + df.loc[index_l,'fichier_mask']

  # Masque
  image_mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
  image_mask = (image_mask > 0)*1
  mask_reduit = cv2.resize(image_mask, dsize = RESOLUTION, interpolation = cv2.INTER_NEAREST)

  # Calcul des métriques en faisant varier les seuils
  m_IoU = []
  m_precision = []
  m_recall = []
  for seuil in seuils:
    # Prévision
    prev_mask = predict_function(file_name, model, seuil)

    # Métriques
    m_IoU.append(calcul_IoU(mask_reduit, prev_mask, 25))
    prec, rec = calcul_precision_recall2(mask_reduit, prev_mask)
    m_precision.append(prec)
    m_recall.append(rec)

  # Fin
  return m_IoU, m_precision, m_recall

In [None]:
def insert01(list1, v0, v1):
  return np.array([v0]+list(list1) + [v1])

def scoreF1(prec, recl):
  if (prec + recl) == 0:
    return 0
  else:
    return 2*prec*recl/(prec + recl)

# Statistiques

## Chargement modèle YOLO

In [None]:
SIZE = 512
RESOLUTION = (SIZE, SIZE)
path_model_saved = '/content/drive/MyDrive/DataScientest/YOLO/YOLOv8_20240124/weights/best.pt'
model_saved = YOLO(path_model_saved)

## Calculs

In [None]:
saving_path = '/content/drive/MyDrive/DataScientest/YOLO/'
today_date = date.today().strftime('%Y%m%d')

tab_iou = []
tab_precision = []
tab_recall = []
seuils = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.2, 0.3, 0.4, 0.45, 0.5, 0.55, 0.6, 0.7, 0.8, 0.9]
for i in tqdm.tqdm(range(df_decoupe.shape[0])):
  m_IoU, m_precision, m_recall = compare_ligne(df_decoupe, i, seuils, predict_YOLOv8_compareligne, model_saved)
  tab_iou.append(m_IoU)
  tab_precision.append(m_precision)
  tab_recall.append(m_recall)

df_iou = df_decoupe[['fichier_img','formes']]
df_precision = df_decoupe[['fichier_img','formes']]
df_recall = df_decoupe[['fichier_img','formes']]

for i,seuil in enumerate(seuils):
    df_iou[seuil] = np.array(tab_iou)[:,i]
    df_precision[seuil] = np.array(tab_precision)[:,i]
    df_recall[seuil] = np.array(tab_recall)[:,i]

df_iou.to_csv(saving_path + 'YOLO_iou_metrics_' + today_date + '.csv')
df_precision.to_csv(saving_path + 'YOLO_accuracy_metrics_' + today_date + '.csv')
df_recall.to_csv(saving_path + 'YOLO_recall_metrics_' + today_date + '.csv')


  0%|          | 0/7500 [00:00<?, ?it/s]Ultralytics YOLOv8.0.38 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8m-seg summary (fused): 245 layers, 27225279 parameters, 0 gradients, 110.0 GFLOPs
100%|██████████| 7500/7500 [1:25:48<00:00,  1.46it/s]


## Graphiques

In [None]:
saving_path = '/content/drive/MyDrive/DataScientest/YOLO/'
df_prec = pd.read_csv(saving_path + 'YOLO_accuracy_metrics_20240408.csv', index_col = 0)
df_recl = pd.read_csv(saving_path + 'YOLO_recall_metrics_20240408.csv', index_col = 0)
df_iou = pd.read_csv(saving_path + 'YOLO_iou_metrics_20240408.csv', index_col = 0)


### Précision et rappel

In [None]:
s_probs = insert01(df_prec.columns[2:].astype('float'), 0, 1)
prec_avg = insert01(np.mean(df_prec.iloc[:,2:], axis=0), 0, 1)
prec_med = insert01(np.median(df_prec.iloc[:,2:], axis=0), 0, 1)
recl_avg = insert01(np.mean(df_recl.iloc[:,2:], axis=0), 1, 0)
recl_med = insert01(np.median(df_recl.iloc[:,2:], axis=0), 1, 0)

fig = go.Figure()
fig.add_trace(go.Scatter(x=s_probs, y=prec_avg, name="Précision moyenne"))
fig.add_trace(go.Scatter(x=s_probs, y=prec_med, name="Précision médiane"))
fig.add_trace(go.Scatter(x=s_probs, y=recl_avg, name="Rappel moyen"))
fig.add_trace(go.Scatter(x=s_probs, y=recl_med, name="Rappel médian"))
fig.update_layout(
    title="Evolution de la précision et du rappel en fonction du seuil de probabilité",
    xaxis_title='Seuil de probabilité',
    yaxis_title="Métrique",
    legend_title="Métriques",
    width = 800,
    height = 500)
fig.show()



In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=recl_avg, y=prec_avg, name="P/R sur moyennes"))
fig.add_trace(go.Scatter(x=recl_med, y=prec_med, name="P/R sur médianes"))
fig.update_layout(
    title="Courbe précision/rappel",
    xaxis_title='Rappel',
    yaxis_title="Précision",
    legend_title="Métriques",
    width=720,
    height=600)
fig.show()

In [None]:
s_probs[np.argmin(np.sqrt((1-recl_avg)**2 + (1-prec_avg)**2))]

0.09

In [None]:
s_probs[np.argmin(np.sqrt((1-recl_med)**2 + (1-prec_med)**2))]

0.03

### Score F1

In [None]:
# On calcule le score F1 direction sur les moyennes et médianes?
f1_avg = []
for prec, recl in zip(prec_avg,recl_avg):
    f1_avg.append(scoreF1(prec, recl))

f1_med = []
for prec, recl in zip(prec_med,recl_med):
    f1_med.append(scoreF1(prec, recl))

fig = go.Figure()
fig.add_trace(go.Scatter(x=s_probs, y=f1_avg, name="Score F1 moyen"))
fig.add_trace(go.Scatter(x=s_probs, y=f1_med, name="Score F1 median"))
fig.update_layout(
    title="Evolution du score F1 en fonction du seuil de probabilité",
    xaxis_title='Seuil de probabilité',
    yaxis_title="Métrique",
    legend_title="Métriques",
    width = 800,
    height = 500)
fig.show()



In [None]:
s_probs[np.argmax(f1_med)]

0.05

In [None]:
recl_med[np.argmax(f1_med)]

0.7825135094551644

### IoU

In [None]:
iou_avg = insert01(np.mean(df_iou.iloc[:,2:], axis=0), 0, 0.01)
iou_med = insert01(np.median(df_iou.iloc[:,2:], axis=0), 0, 0.01)

fig = go.Figure()
fig.add_trace(go.Scatter(x=s_probs, y=iou_avg, name="IoU moyen"))
fig.add_trace(go.Scatter(x=s_probs, y=iou_med, name="IoU median"))
fig.update_layout(
    title="Evolution de l'IoU en fonction du seuil de probabilité",
    xaxis_title='Seuil de probabilité',
    yaxis_title="Métrique",
    legend_title="Métriques",
    width = 800,
    height = 500)
fig.show()



In [None]:
s_probs[np.argmax(iou_med)]

0.1

In [None]:
s_probs[np.argmax(iou_avg)]

0.2

### Répartition en fonction du nombre de formes

In [None]:
s_prob = '0.05'
precs = df_prec[s_prob]
recls = df_recl[s_prob]
ious = df_iou[s_prob]
F1s = np.array([scoreF1(prec, recl) for prec, recl in zip(precs, recls)])

In [None]:
seuils_formes = [0, 1, 5, 20, 50, 3000]
filtres = []
somme = 0
for i in range(len(seuils_formes[:-1])):
    filtres.append((df_prec.formes >= seuils_formes[i]) & (df_prec.formes < seuils_formes[i+1]))
    print("Nombre de bâtiments entre", seuils_formes[i], "et", seuils_formes[i+1]-1, "=", filtres[i].sum())
    somme += filtres[i].sum()

print("total = ", somme)

Nombre de bâtiments entre 0 et 0 = 1745
Nombre de bâtiments entre 1 et 4 = 1421
Nombre de bâtiments entre 5 et 19 = 1803
Nombre de bâtiments entre 20 et 49 = 1369
Nombre de bâtiments entre 50 et 2999 = 1162
total =  7500


In [None]:
noms=[str(seuils_formes[i])+" à "+str(seuils_formes[i+1]-1)+" bâtiments"
      for i in range(len(seuils_formes[:-1]))]
noms[0] = "aucun bâtiment"
noms[-1] = "plus de "+str(seuils_formes[-2])+" bâtiments"
print(noms)


['aucun bâtiment', '1 à 4 bâtiments', '5 à 19 bâtiments', '20 à 49 bâtiments', 'plus de 50 bâtiments']


In [None]:
fig = go.Figure()
for i,fltr in enumerate(filtres):
    fig.add_trace(go.Box(y=precs[fltr], name=noms[i]))
fig.update_layout(
    title="Précision du modèle en fonction du nombre de bâtiments par image",
    width = 800,
    height = 500)
fig.update_yaxes(title_text='Précision')
fig.show()

In [None]:
fig = go.Figure()
for i,fltr in enumerate(filtres):
    fig.add_trace(go.Box(y=recls[fltr], name=noms[i]))
fig.update_layout(
    title="Rappel du modèle en fonction du nombre de bâtiments par image",
    width = 800,
    height = 500)
fig.update_yaxes(title_text='Rappel')
fig.show()


In [None]:
fig = go.Figure()
for i,fltr in enumerate(filtres):
    fig.add_trace(go.Box(y=F1s[fltr], name=noms[i]))
fig.update_layout(
    title="Score F1 du modèle en fonction du nombre de bâtiments par image",
    width = 800,
    height = 500)
fig.update_yaxes(title_text='Score F1')
fig.show()

In [None]:
ious[filtres[0]] = np.floor(ious[filtres[0]])
fig = go.Figure()
for i,fltr in enumerate(filtres):
    fig.add_trace(go.Box(y=ious[fltr], name=noms[i]))
fig.update_layout(
    title="IoU global du modèle en fonction du nombre de bâtiments par image",
    width = 800,
    height = 500)
fig.update_yaxes(title_text='IoU')
fig.show()
