# Importations

In [2]:
from PIL import Image
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from torchvision.transforms import ToTensor, Resize
import numpy as np

## Loading the dataset

In [3]:
dataset_path = "./img_dataset"

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

dataset = datasets.ImageFolder(root=dataset_path, transform=preprocess)

In [4]:
train_dataset, test_dataset = random_split(
    dataset=dataset,
    lengths=[int(0.8*len(dataset)), int(0.2*len(dataset))+1]
)

In [5]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

## Vectorization of the images

In [6]:
# Transformer une image RGB en vecteur
def image_to_vector(image, size=(32, 32)):
    transform = transforms.Compose([Resize(size), ToTensor()])
    tensor_image = transform(image).permute(1, 2, 0).numpy()  # Convertir en numpy
    return tensor_image.flatten()

In [7]:
from skimage.feature import hog

def extract_hog_features(image):
    # Convertir l'image en niveaux de gris si nécessaire
    gray_image = np.mean(image, axis=2) if image.shape[-1] == 3 else image
    features, _ = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    return features

## Learning with KMeans

In [9]:
from sklearn.cluster import KMeans
from PIL import Image

# Appliquer KMeans sur des vecteurs d'images
kmeans = KMeans(n_clusters=5, random_state=0)

# Convertir les images en vecteurs
features = [image_to_vector(transforms.ToPILImage()(img)) for img, _ in dataset]
clusters = kmeans.fit_predict(features)

# Afficher la précision
print(f'KMeans accuracy : {accuracy_score(clusters, [label for _, label in dataset])}')

KMeans accuracy : 0.13786764705882354


## Learning with Random Forest

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Préparation des données
X = [extract_hog_features(np.array(transforms.ToPILImage()(img))) for img, labels in dataset]  # Extraits des caractéristiques
y = [label for _, label in dataset]  # Étiquettes correspondantes

# Diviser en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Entraîner un modèle simple
clf = RandomForestClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train)

# Prédire et évaluer
accuracy = clf.score(X_test, y_test)
print(f"Random Forest accuracy : {100*accuracy:.2f}%")


Random Forest accuracy : 30.28%
