In [1]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from joblib import dump

In [2]:
prefix_dir = '../dataset/TRAIN/'

images = []
labels = []

In [3]:
%%time
for file in os.listdir(prefix_dir):
    image_path = os.path.join(prefix_dir, file)
    image = cv2.imread(image_path)

    if image is not None:
        # Convertir les images en niveaux de gris et les redimensionner en 256x256
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (256, 256))

        # Aplatir l'image en un vecteur 1D
        image_flattened = image.flatten()

        # Déterminer le label de l'image
        if "virus" in file.lower():
            label = 1
        elif "bacteria" in file.lower():
            label = 2
        else:
            label = 0

        images.append(image_flattened)
        labels.append(label)
    else:
        print(f"Image not loaded correctly: {image_path}")


CPU times: total: 44.2 s
Wall time: 1min 10s


In [4]:
%%time
from sklearn.preprocessing import StandardScaler
# Convertir les listes en tableaux numpy
images = np.array(images)
labels = np.array(labels)


CPU times: total: 109 ms
Wall time: 271 ms


In [5]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Hyperparamètres et optimisation
from sklearn.model_selection import GridSearchCV

# Définir la grille des hyperparamètres
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'max_iter': list(range(1000,8000,1000)),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}

# Créer un objet GridSearchCV
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5)

In [None]:
# Entrainer le modèle
grid_search.fit(X_train_scaled, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

# Prédire les labels sur l'ensemble de validation
y_pred = grid_search.best_estimator_.predict(X_val_scaled)

accuracy = accuracy_score(y_val, y_pred)
print("Accuracy:", accuracy)
dump(grid_search.best_estimator_, 'logreg_model.joblib')