In [1]:
from sklearn.preprocessing import StandardScaler
import os
import pandas as pd
from sklearn import datasets
import plotly as plt
import plotly.express as px
from features import *
from clustering import *
from utils import *
from constant import  PATH_OUTPUT, MODEL_CLUSTERING, PATH_DATA, PATH_DATA_ALL

from sklearn.cluster import KMeans

from sklearn.pipeline import Pipeline

from skimage.transform import resize

In [2]:
from images import load_images_from_folder

# Example usage:
folder_path = PATH_DATA_ALL + "/code_test"
images, labels_true, folder_names = load_images_from_folder(folder_path)
taille = len(images)
nombre_de_canaux = 3
# print(f"Smallest height: {smallest_height}")
# print(f"Smallest width: {smallest_width}")
# print(f"Hightest height: {smallest_height2}")
# print(f"Hightest width: {smallest_width2}")

In [3]:

# Supposons que `images` est votre liste d'images en RGB
descriptors_hsv = convert_color_space(images, "HSV") # ou "Lab"

In [4]:
# descriptors_sift = compute_sift_descriptors(images)
sift_descriptors = extract_sift_features(descriptors_hsv)
print(f"Taille de sift_descriptors: {len(sift_descriptors)}")
# Étape 2 : Création des vecteurs de caractéristiques avec Bag of Features
descriptors_sift = create_bag_of_features(sift_descriptors, n_clusters=20)

Taille de sift_descriptors: 42


In [5]:
images_to_use = descriptors_hsv

In [6]:

descriptors_hog = compute_hog_descriptors(images_to_use)

In [7]:
descriptors_hist = compute_gray_histograms(images_to_use)

**5. Conversion des données de clustering au format requis pour la visualisation avec Streamlit.**

**TODO :**
- Dans le fichier `utils.py`, implémenter la fonction `conversion_3d` afin de convertir un vecteur de dimension n vers une dimension 3 pour a visualisation.
- Lien : https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

In [8]:
print(np.array(descriptors_hog).shape)
print(np.array(descriptors_hsv).shape)
print(np.array(descriptors_sift).shape)

(42, 8192)
(42, 256, 256, 3)
(42, 20)


**3. Application de RBM et KMEANS**

In [5]:
from sklearn.preprocessing import StandardScaler

images_to_use = [images, convert_color_space(images, "HSV")]
descs0 = ["", "HSV"]
descs = ["hist", "hog", "sift"]
list_dict=[]

for d0 in range(len(descs0)):
    descriptors_hog = compute_hog_descriptors(images_to_use[d0])
    descriptors_hist = compute_gray_histograms(images_to_use[d0])
    sift_descriptors = extract_sift_features(images_to_use[d0])
    descriptors_sift = create_bag_of_features(sift_descriptors, n_clusters=20)
    descriptors = [descriptors_hist, descriptors_hog, descriptors_sift]
    for d in range(len(descs)):

        # Présumons que `images` est votre liste d'images prétraitées et aplatie en vecteurs

        # Initialisation de la classe StackedRBM
        stacked_rbm = StackedRBM(n_components_list=[256, 128], n_iter=10, learning_rate=0.01, batch_size=10)

        # Ajustement des RBMs sur les données d'image
        stacked_rbm.fit(descriptors[d])

        # Transformation des images en nouvelles représentations avec les RBMs entraînés
        transformed_images = stacked_rbm.transform(descriptors[d])

        # Normalisation des caractéristiques pour améliorer les performances de K-Means
        scaler = StandardScaler()
        transformed_images_scaled = conversion_3d(transformed_images)

        # Clustering avec K-Means
        kmeans = KMeans(n_clusters=20, random_state=42)
        clusters = kmeans.fit_predict(transformed_images_scaled)
        
        
        metric = show_metric(labels_true, clusters, transformed_images_scaled, bool_show=True, name_descriptor="HSV et HISTOGRAM", name_model = "Stacked RBM", bool_return=True)
        list_dict.append(metric)
        
        
        scaler = StandardScaler()
        descriptors_norm = scaler.fit_transform(descriptors[d])
        
        x_3d_norm = conversion_3d(descriptors_norm)
        
        df = create_df_to_export(x_3d_norm, labels_true, kmeans.labels_)

        # sauvegarde des données
        df.to_excel(PATH_OUTPUT+f"/save_clustering_{descs0[d0]}_{descs[d]}_rbm_kmeans.xlsx")
        print(f"save_clustering_{descs0[d0]}_{descs[d]}_rbm_kmeans.xlsx")
        
df_metric = pd.DataFrame(list_dict)
df_metric.to_excel(PATH_OUTPUT+"/save_metric.xlsx")

[BernoulliRBM] Iteration 1, pseudo-likelihood = -4318.04, time = 3.44s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -8.70, time = 0.79s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -6.61, time = 0.65s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -5.60, time = 0.65s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -44227.75, time = 0.65s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -4.78, time = 0.99s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -4.56, time = 0.95s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -80590.83, time = 0.69s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -3.33, time = 0.67s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -4.35, time = 0.68s
[BernoulliRBM] Iteration 1, pseudo-likelihood = -185.34, time = 0.50s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -179.08, time = 0.70s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -177.30, time = 0.82s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -176.26, time = 0.76s
[BernoulliRBM] Iteration 5, 

  return fit_method(estimator, *args, **kwargs)


########## Métrique descripteur : HSV et HISTOGRAM
Adjusted Mutual Information: 0.10265030070770784
Silhouette Score: 0.9047619
save_clustering__hist_rbm_kmeans.xlsx
[BernoulliRBM] Iteration 1, pseudo-likelihood = -5604.76, time = 5.86s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -5563.10, time = 4.80s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -5464.07, time = 2.64s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -5458.39, time = 2.94s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -5418.81, time = 2.27s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -5401.26, time = 2.51s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -5356.28, time = 2.32s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -5265.82, time = 2.90s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -5214.11, time = 2.77s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -5295.01, time = 2.05s
[BernoulliRBM] Iteration 1, pseudo-likelihood = -89.84, time = 0.15s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -78.41

  return fit_method(estimator, *args, **kwargs)


########## Métrique descripteur : HSV et HISTOGRAM
Adjusted Mutual Information: 0.14812854818154125
Silhouette Score: 0.9047619
save_clustering__sift_rbm_kmeans.xlsx
[BernoulliRBM] Iteration 1, pseudo-likelihood = -11.97, time = 0.49s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -6.64, time = 0.48s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -4.07, time = 0.69s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -2.83, time = 0.58s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -2.21, time = 0.51s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -1.71, time = 0.56s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -730081.05, time = 0.42s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -1.34, time = 0.52s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -1.15, time = 0.58s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -1.07, time = 0.47s
[BernoulliRBM] Iteration 1, pseudo-likelihood = -184.67, time = 0.24s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -180.12, time = 0.51s
[Bernou

  return fit_method(estimator, *args, **kwargs)


########## Métrique descripteur : HSV et HISTOGRAM
Adjusted Mutual Information: 0.14812854818154125
Silhouette Score: 0.9047619
save_clustering_HSV_hist_rbm_kmeans.xlsx
[BernoulliRBM] Iteration 1, pseudo-likelihood = -5631.89, time = 8.09s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -5584.06, time = 10.32s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -5526.12, time = 11.11s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -5496.78, time = 10.16s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -5456.93, time = 9.72s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -5429.12, time = 7.09s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -5411.58, time = 1.33s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -5319.39, time = 6.13s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -5303.60, time = 15.81s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -5283.76, time = 2.61s
[BernoulliRBM] Iteration 1, pseudo-likelihood = -89.17, time = 0.05s
[BernoulliRBM] Iteration 2, pseudo-likelihood =

  return fit_method(estimator, *args, **kwargs)


########## Métrique descripteur : HSV et HISTOGRAM
Adjusted Mutual Information: 0.12455613933161724
Silhouette Score: 0.88095236
save_clustering_HSV_sift_rbm_kmeans.xlsx


In [10]:
print("\n\n##### Résultat ######")
# metric_hist = show_metric(labels_true, clusters, transformed_images_scaled, bool_show=True, name_descriptor="HISTOGRAM", bool_return=True)

metric_hsv_hist_rbm = show_metric(labels_true, clusters, transformed_images_scaled, bool_show=True, name_descriptor="HSV et HISTOGRAM", name_model = "Stacked RBM", bool_return=True)
# metric_hsv_hog_rbm = pd.read_excel("output/save_clustering_hsv_hog_rbm_kmeans.xlsx")
# metric_hsv_sift_rbm = pd.read_excel("output/save_clustering_hsv_sift_rbm_kmeans.xlsx")
# metric_hist_rbm = pd.read_excel("output/save_clustering_hist_rbm_kmeans.xlsx")
# metric_hog_rbm = pd.read_excel("output/save_clustering_hog_rbm_kmeans.xlsx")
# metric_sift_rbm = pd.read_excel("output/save_clustering_sift_rbm_kmeans.xlsx")



##### Résultat ######
########## Métrique descripteur : HSV_HISTOGRAM
Adjusted Mutual Information: -0.00973927291987954
Silhouette Score: 1.0


**5. Conversion des données de clustering au format requis pour la visualisation avec Streamlit.**

**TODO :**
- Dans le fichier `utils.py`, implémenter la fonction `conversion_3d` afin de convertir un vecteur de dimension n vers une dimension 3 pour a visualisation.
- Lien : https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

In [11]:
scaler = StandardScaler()
descriptors_hsv_hist_rbm_norm = scaler.fit_transform(descriptors_hsv_hist_rbm)
descriptors_hsv_hog_rbm_norm = scaler.fit_transform(descriptors_hsv_hog_rbm)
descriptors_hsv_sift_rbm_norm = scaler.fit_transform(descriptors_hsv_sift_rbm)
descriptors_hsv_rbm_norm = scaler.fit_transform(descriptors_hsv_rbm)
descriptors_hist_rbm_norm = scaler.fit_transform(descriptors_hist_rbm)
descriptors_hog_rbm_norm = scaler.fit_transform(descriptors_hog_rbm)
descriptors_sift_rbm_norm = scaler.fit_transform(descriptors_sift_rbm)
descriptors_rbm_norm = scaler.fit_transform(descriptors_rbm)

#conversion vers un format 3D pour la visualisation
# x_3d_hist = conversion_3d(descriptors_hist_norm)
# # x_3d_hog = conversion_3d(descriptors_hog_norm)

x_3d_hsv_hist_rbm_norm = conversion_3d(descriptors_hsv_hist_rbm_norm)
x_3d_hsv_hog_rbm_norm = conversion_3d(descriptors_hsv_hog_rbm_norm)
x_3d_hsv_sift_rbm_norm = conversion_3d(descriptors_hsv_sift_rbm_norm)
x_3d_hsv_rbm_norm = conversion_3d(descriptors_hsv_rbm_norm)
x_3d_hist_rbm_norm = conversion_3d(descriptors_hist_rbm_norm)
x_3d_hog_rbm_norm = conversion_3d(descriptors_hog_rbm_norm)
x_3d_sift_rbm_norm = conversion_3d(descriptors_sift_rbm_norm)
x_3d_rbm_norm = conversion_3d(descriptors_rbm_norm)

# création des dataframe pour la sauvegarde des données pour la visualisation
df_hist = create_df_to_export(x_3d_hist, labels_true, kmeans.labels_)
# df_hog = create_df_to_export(x_3d_hog, labels_true, kmeans.labels_)

df_hsv_hist_rbm = create_df_to_export(x_3d_hsv_hist_rbm_norm, labels_true, kmeans.labels_)
df_hsv_hog_rbm = create_df_to_export(x_3d_hsv_hog_rbm_norm, labels_true, kmeans.labels_)
df_hsv_sift_rbm = create_df_to_export(x_3d_hsv_sift_rbm_norm, labels_true, kmeans.labels_)
df_hsv_rbm = create_df_to_export(x_3d_hsv_rbm_norm, labels_true, kmeans.labels_)
df_hist_rbm = create_df_to_export(x_3d_hist_rbm_norm, labels_true, kmeans.labels_)
df_hist_rbm = create_df_to_export(x_3d_hist_rbm_norm, labels_true, kmeans.labels_)
df_hog_rbm = create_df_to_export(x_3d_hog_rbm_norm, labels_true, kmeans.labels_)
df_sift_rbm = create_df_to_export(x_3d_sift_rbm_norm, labels_true, kmeans.labels_)
df_3d_rbm = create_df_to_export(x_3d_rbm_norm, labels_true, kmeans.labels_)


# Vérifie si le dossier existe déjà
if not os.path.exists(PATH_OUTPUT):
    # Crée le dossier
    os.makedirs(PATH_OUTPUT)

# sauvegarde des données
df_hist.to_excel(PATH_OUTPUT+"/save_clustering_hsv_hist_rbm_kmeans.xlsx")
# df_hog.to_excel(PATH_OUTPUT+"/save_clustering_hog_kmeans.xlsx")




In [14]:
list_dict = [metric_hsv_hist_rbm, metric_hsv_hog_rbm, metric_hsv_sift_rbm, metric_hsv_rbm, metric_hist_rbm, metric_hog_rbm, metric_sift_rbm, metric_rbm]
df_metric = pd.DataFrame(list_dict)
df_metric.to_excel(PATH_OUTPUT+"/save_metric.xlsx")

SyntaxError: invalid syntax (1554332066.py, line 2)