In [1]:
# Importation des librairies
from my_function import ( 
    sel_classif_pixel,
    report_from_dict_to_df,
    supprimer_dossier_non_vide)
import os
import geopandas as gpd
import numpy as np
import pandas as pd
import sys
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, RepeatedStratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report, \
    accuracy_score
sys.path.append('/home/onyxia/work/libsigma')
import read_and_write as rw
import classification as cla
import plots

In [2]:
# Création du dossier de sauvegarde temporaire et paramètres
output_dir = "/home/onyxia/work/output_classif"
os.makedirs(output_dir, exist_ok=True) # Création d'un dossier output temporaire

sample_path = "/home/onyxia/work/Depot_Git/results/data/sample/Sample_BD_foret_T31TCJ.shp"
path_sample_px = os.path.join(output_dir,"sample_classif_px.shp")

sample_rasterized = os.path.join(output_dir,"rasterized_sample.tif")
path_image_3b = "/home/onyxia/work/Depot_Git/results/data/img_pretraitees/Serie_temp_S2_3_bands.tif"
path_image_allbands = "/home/onyxia/work/Depot_Git/results/data/img_pretraitees/Serie_temp_S2_allbands.tif"

path_sample_px_centroid = os.path.join(output_dir,"sample_px_centroid.shp")

In [3]:
# Save d'un vecteur echantillons avec que les données pour classif pixel
sample = gpd.read_file(sample_path)
sample_px = sel_classif_pixel(sample[['Code',"geometry"]])
sample_px.to_file(path_sample_px)

In [5]:
# cmd rasterisation paramètres
in_vector = os.path.join(output_dir,'sample_classif_px.shp')
ref_image = '/home/onyxia/work/Depot_Git/results/data/img_pretraitees/Serie_temp_S2_allbands.tif'
ref_image_open = rw.open_image(ref_image)
out_image = os.path.join(output_dir, "rasterized_sample.tif")
field_name = 'Code'  # field containing the numeric label of the classes
sptial_resolution = rw.get_pixel_size(ref_image_open)[0]
xmin,ymax = rw.get_origin_coordinates(ref_image_open)
y,x = rw.get_image_dimension(ref_image_open)[0:2]
xmax,ymin = xmin+x*10,ymax-y*10

# define command pattern to fill with paremeters
cmd_pattern = ("gdal_rasterize -a {field_name} "
               "-tr {sptial_resolution} {sptial_resolution} "
               "-te {xmin} {ymin} {xmax} {ymax} -ot Byte -of GTiff "
               "{in_vector} {out_image}")

# fill the string with the parameter thanks to format function
cmd = cmd_pattern.format(in_vector=in_vector, xmin=xmin, ymin=ymin, xmax=xmax,
                         ymax=ymax, out_image=out_image, field_name=field_name,
                         sptial_resolution=sptial_resolution)

# execute the command in the terminal
os.system(cmd)

0...10...20...30...40...50...60...70...80...90...100 - done.


0

In [None]:
# Extraction des échantillons à partir d'un fichier de points pour tenir compte de l'appartenance aux polygones

sample_px['geom_points'] = sample_px.centroid                  # Calcul des centroides
sample_px_centroid = sample_px[['Code','geom_points']]
sample_px_centroid.to_file(path_sample_px_centroid)      # Save des centroides dans le dossier output_classif

# Récupération des coordonnées X
list_row, list_col = rw.get_row_col_from_file (path_sample_px_centroid,path_image_allbands)     # Coordonnées images des points du fichiers points
image_3b = rw.load_img_as_array(path_image_3b)
X = image_3b[(list_row, list_col)]

# Récupération des coordonnées Y
gdf = gpd.read_file(path_sample_px_centroid)             
Y = gdf.loc[:, "Code"].values
Y = np.atleast_2d(Y).T

# Séparation des données avec un jeu train et test
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.7)

In [None]:
# Application validation statifiée avec appartenance à polygones

nb_iter = 10
nb_folds = 2

# outputs
suffix = '_CV{}fold_stratifed_x{}times'.format(nb_folds, nb_iter)
out_classif = os.path.join(output_dir, 'ma_classif{}.tif'.format(suffix))
out_matrix = os.path.join(output_dir, 'ma_matrice{}.png'.format(suffix))
out_qualite = os.path.join(output_dir, 'mes_qualites{}.png'.format(suffix))

list_cm = []
list_accuracy = []
list_report = []

# Iter on stratified K fold
kf = RepeatedStratifiedKFold(n_splits=nb_folds, n_repeats=nb_iter)
for train, test in kf.split(X, Y):
    X_train, X_test = X[train], X[test]
    Y_train, Y_test = Y[train], Y[test]

    # Modèle Random Forest Classifier
    rfc = RandomForestClassifier(
        max_depth = 50,
        oob_score = True,
        max_samples = 0.75,
        class_weight = 'balanced',
        n_jobs = -1
    )
    rfc.fit(X_train, Y_train)

    Y_predict = rfc.predict(X_test)

    # compute quality
    list_cm.append(confusion_matrix(Y_test, Y_predict))
    list_accuracy.append(accuracy_score(Y_test, Y_predict))
    report = classification_report(Y_test, Y_predict, labels=np.unique(Y_predict), output_dict=True)

    # store them
    list_report.append(report_from_dict_to_df(report))

In [None]:
a,b,c,d,e,f,g,h,j = 0,0,0,0,0,0,0,0,0
for i in Y:
    if i == 11:
        a+=1
    if i == 12:
        b+=1
    if i == 13:
        c+=1
    if i == 14:
        d+=1
    if i == 21:
        e+=1
    if i == 22:
        f+=1
    if i == 23:
        g+=1
    if i == 24:
        h+=1
    if i == 25:
        j+=1

In [None]:
print(a,b,c,d,e,f,g,h,j)

In [None]:
kf = RepeatedStratifiedKFold(n_splits=5, n_repeats=1)
for train, test in kf.split(X, Y):
    X_train, X_test = X[train], X[test]
    Y_train, Y_test = Y[train], Y[test]
for i in kf.split(X, Y):
    print(i[0])

In [None]:
for i in list_report:
    print (i)

In [None]:
# compute mean of cm
array_cm = np.array(list_cm)
mean_cm = array_cm.mean(axis=0)

# compute mean and std of overall accuracy
array_accuracy = np.array(list_accuracy)
mean_accuracy = array_accuracy.mean()
std_accuracy = array_accuracy.std()

# compute mean and std of classification report
array_report = np.array(list_report)
mean_report = array_report.mean(axis=0)
std_report = array_report.std(axis=0)
a_report = list_report[0]
mean_df_report = pd.DataFrame(mean_report, index=a_report.index,
                              columns=a_report.columns)
std_df_report = pd.DataFrame(std_report, index=a_report.index,
                             columns=a_report.columns)

# Display confusion matrix
plots.plot_cm(mean_cm, np.unique(Y_predict))
plt.savefig(out_matrix, bbox_inches='tight')

# Display class metrics
fig, ax = plt.subplots(figsize=(10, 7))
ax = mean_df_report.T.plot.bar(ax=ax, yerr=std_df_report.T, zorder=2)
ax.set_ylim(0.5, 1)
_ = ax.text(1.5, 0.95, 'OA : {:.2f} +- {:.2f}'.format(mean_accuracy,
                                                      std_accuracy),
            fontsize=14)
ax.set_title('Class quality estimation')

# custom : cuteness
# background color
ax.set_facecolor('ivory')
# labels
x_label = ax.get_xlabel()
ax.set_xlabel(x_label, fontdict={'fontname': 'Sawasdee'}, fontsize=14)
y_label = ax.get_ylabel()
ax.set_ylabel(y_label, fontdict={'fontname': 'Sawasdee'}, fontsize=14)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.tick_params(axis='x', colors='darkslategrey', labelsize=14)
ax.tick_params(axis='y', colors='darkslategrey', labelsize=14)
# grid
ax.minorticks_on()
ax.yaxis.grid(which='major', color='darkgoldenrod', linestyle='--',
              linewidth=0.5, zorder=1)
ax.yaxis.grid(which='minor', color='darkgoldenrod', linestyle='-.',
              linewidth=0.3, zorder=1)
plt.savefig(out_qualite, bbox_inches='tight')

In [None]:
## Entraîner le modèle

# Modèle Random Forest Classifier
rfc = RandomForestClassifier(
    max_depth = 50,
    oob_score = True,
    max_samples = 0.75,
    class_weight = 'balanced',
    n_jobs = -1
)

# Fit avec nos valeurs
rfc.fit(X_train,Y_train)

In [None]:
# Sauvegarder la carte
save_classification(model, images, 'results/data/classif/carte_essences_echelle_pixel.tif')

In [None]:
## Nettoyage des dossiers
supprimer_dossier_non_vide(output_dir)