In [3]:
import os
import numpy as np
from PIL import Image
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Loading and preprocessing data
train_labels_path = r'C:\Users\ryann\Desktop\Nouveau dossier\Projet ML\y_train.csv'
train_labels_df = pd.read_csv(train_labels_path)
train_images_directory = r'C:\Users\ryann\Desktop\Nouveau dossier\Projet ML\train_data_unlabeled\train_data_unlabeled'
train_file_paths = [os.path.join(train_images_directory, f'img_{idx+1}.png') for idx in train_labels_df.index]

def load_images_and_labels(file_paths, labels_df):
    images = []
    labels = []
    for path in file_paths:
        img = Image.open(path).convert('RGB')
        img = img.resize((224, 224))
        img_array = np.array(img)
        img_array = preprocess_input(img_array)  # VGG16-specific preprocessing
        images.append(img_array)
    labels = labels_df['target'].values
    return np.array(images), to_categorical(labels, num_classes=4)

images, labels = load_images_and_labels(train_file_paths, train_labels_df)

# Splitting data
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.3, random_state=42)

# VGG16 model configuration and compilation
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # This line sets the VGG16 base model layers as non-trainable
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(4, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Model fitting with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Validation set predictions
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

# Confusion matrix and classification report
print(confusion_matrix(y_true, y_pred_classes))
print(classification_report(y_true, y_pred_classes))


KeyboardInterrupt: 

In [1]:
import matplotlib.pyplot as plt
train_labels_df['target'].value_counts().plot(kind='bar')
plt.title('Distribution des Classes')
plt.xlabel('Classe')
plt.ylabel('Nombre d\'observations')
plt.show()

NameError: name 'train_labels_df' is not defined