In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path

from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import hog

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report

import joblib


In [2]:
DATA_ROOT = Path(r'C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\PROJET FINAL DPL')

original_dir = DATA_ROOT / 'Original Images(Eggs)'
aug_dir = DATA_ROOT / 'Augmented_Images(Eggs)'

def gather_paths(base_dir):
    rows = []
    if not base_dir.exists():
        return rows

    for label_dir in ['Good Eggs', 'Bad Eggs']:
        p = base_dir / label_dir
        if not p.exists():
            continue

        for img_path in p.glob('*'):
            if img_path.suffix.lower() not in ['.jpg', '.jpeg', '.png', '.bmp']:
                continue

            rows.append({
                'path': str(img_path),
                'label': 'good' if 'Good' in label_dir else 'bad'
            })

    return rows

rows = []
rows += gather_paths(original_dir)
rows += gather_paths(aug_dir)

df = pd.DataFrame(rows)
df['label_enc'] = df['label'].map({'good': 0, 'bad': 1})

print("Nombre total d'images :", len(df))
df.head()


Nombre total d'images : 7000


Unnamed: 0,path,label,label_enc
0,C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\...,good,0
1,C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\...,good,0
2,C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\...,good,0
3,C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\...,good,0
4,C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\...,good,0


In [3]:
X = df['path'].values
y = df['label_enc'].values

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [4]:
class HOGExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, img_size=(128, 128)):
        self.img_size = img_size

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        features = []

        for img_path in X:
            img = imread(img_path)
            img = resize(img, self.img_size)
            gray = rgb2gray(img)

            hog_feat = hog(
                gray,
                orientations=9,
                pixels_per_cell=(8, 8),
                cells_per_block=(2, 2),
                block_norm='L2-Hys'
            )

            features.append(hog_feat)

        return np.array(features)


In [5]:
pipeline = Pipeline([
    ('hog', HOGExtractor()),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=150, random_state=42)),
    ('svm', SVC(kernel='rbf', probability=True))
])


In [6]:
pipeline.fit(X_train, y_train)


OSError: Could not find a backend to open `C:\Users\cheic\Documents\M2 IA MathInfo\DeepL\PROJET FINAL DPL\Augmented_Images(Eggs)\Bad Eggs\Aug_Bad_Egg (1000).jpg`` with iomode `r`.
Based on the extension, the following plugins might add capable backends:
  pyav:  pip install imageio[pyav]

In [None]:
y_pred = pipeline.predict(X_test)

print(classification_report(
    y_test,
    y_pred,
    target_names=['Œuf sain', 'Œuf défectueux']
))


In [None]:
def test_single_image(img_path, pipeline):
    pred = pipeline.predict([img_path])[0]
    proba = pipeline.predict_proba([img_path])[0]

    label = "Œuf sain" if pred == 0 else "Œuf défectueux"

    img = imread(img_path)
    plt.figure(figsize=(4, 4))
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"{label}\nConfiance : {max(proba):.2f}")
    plt.show()


In [None]:
test_single_image(
    r'C:\Users\cheic\Pictures\Screenshots\1.png',
    pipeline
)


In [None]:
joblib.dump(pipeline, 'egg_quality_pipeline.pkl')
print("Modèle sauvegardé sous : egg_quality_pipeline.pkl")
