In [None]:
import os
import numpy as np
from PIL import Image
import pandas as pd
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping

train_labels_path = r'C:\Users\ryann\Desktop\Nouveau dossier\Projet ML\y_train.csv'
train_labels_df = pd.read_csv(train_labels_path)
train_images_directory = r'C:\Users\ryann\Desktop\Nouveau dossier\Projet ML\train_data_unlabeled\train_data_unlabeled'

train_file_paths = [os.path.join(train_images_directory, f'img_{idx+1}.png') for idx in train_labels_df.index]
labels = train_labels_df['target'].values

def load_and_preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img = img.resize((224, 224))
    img_array = np.array(img)
    img_array = preprocess_input(img_array)
    return img_array

def data_generator_with_augmentation(file_paths, labels, batch_size):
    datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        preprocessing_function=preprocess_input
    )
    
    while True:
        dataset = list(zip(file_paths, labels))
        np.random.shuffle(dataset)
        for start in range(0, len(dataset), batch_size):
            end = min(start + batch_size, len(dataset))
            batch_images = []
            batch_labels = []
            for filepath, label in dataset[start:end]:
                img_array = load_and_preprocess_image(filepath)
                batch_images.append(img_array)
                batch_labels.append(label)
            batch_images = np.array(batch_images)
            batch_labels = to_categorical(np.array(batch_labels), num_classes=4)
            for x, y in datagen.flow(batch_images, batch_labels, batch_size=batch_size):
                yield x, y
                break

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10)

batch_size = 32
train_gen = data_generator_with_augmentation(train_file_paths, labels, batch_size)
steps_per_epoch = len(train_file_paths) // batch_size

model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=20, callbacks=[early_stopping])


In [None]:
test_images_dir = "C:/Users/ryann/Desktop/Nouveau dossier/Projet ML/test_data_unlabeled/test_data_unlabeled"
submission_example_path = "C:/Users/ryann/Desktop/Nouveau dossier/Projet ML/submission_example.csv"

def prep_test_images(image_path):
    img = Image.open(image_path).convert('RGB').resize((224, 224))
    return np.array(img) / 255.0

test_images = [prep_test_images(os.path.join(test_images_dir, f)) for f in sorted(os.listdir(test_images_dir), key=lambda x: int(x.split('_')[1].split('.')[0]))]

predictions = model.predict(np.array(test_images))
predicted_classes = np.argmax(predictions, axis=1)

submission_df = pd.read_csv(submission_example_path)
submission_df['target'] = predicted_classes

submission_df.to_csv("C:/Users/ryann/Desktop/Nouveau dossier/Projet ML/submission.csv", index=False)
print("Saved submission file at 'C:/Users/ryann/Desktop/Nouveau dossier/Projet ML/submission.csv'")