# 1-Import

In [75]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from time import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize, LabelEncoder
from sklearn.utils import class_weight, shuffle
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

from tensorflow.keras.models import Model, Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
from keras.models import Sequential
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from keras.saving import load_model

#from plot_keras_history import show_history, plot_history

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# For normalization
import cv2
from skimage.exposure import match_histograms

# os.environ["TF_KERAS"]='1'
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# 2- Data collection

In [76]:
# Data collection
data = pd.read_csv('/kaggle/input/UBC-OCEAN/test.csv')
#data['label'] = 0
data['image_path'] = [''.join(['/kaggle/input/UBC-OCEAN/test_thumbnails/', str(x), '_thumbnail.png']) if ''.join([str(x), '_thumbnail.png']) in os.listdir('/kaggle/input/UBC-OCEAN/test_thumbnails') else ''.join(['/kaggle/input/UBC-OCEAN/test_images/', str(x), '.png']) for x in data['image_id']]

#Test
# Loading data (test)
df = pd.read_csv('/kaggle/input/UBC-OCEAN/train.csv')
df['image_path'] = [''.join(['/kaggle/input/UBC-OCEAN/train_thumbnails/', str(x), '_thumbnail.png']) if ''.join([str(x), '_thumbnail.png']) in os.listdir('/kaggle/input/UBC-OCEAN/train_thumbnails') else ''.join(['/kaggle/input/UBC-OCEAN/train_images/', str(x), '.png']) for x in df['image_id']]

X_df = df.drop(columns='label')
y_df = df['label']

X,X_test,y,y_test = train_test_split(X_df, y_df, train_size=0.1, stratify = df['label'])

data = pd.DataFrame(X, columns = list(df.columns).remove('label'))[0:10]
data['label'] = y                   

# 3-Model preparation

In [77]:
# Fonction de création du modèle
def create_model_fct(nb_lab) :
    weights_path = "/kaggle/input/weights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5" # ATTENTION : activer hors connexion
    #weights_path = 'imagenet'
    # Charger le modèle VGG16 pré-entraîné
    #base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    model0 = VGG16(include_top=False, weights=weights_path, input_shape=(224, 224, 3)) # ATTENTION : activer hors connexion
    
    # Layer non entraînables = on garde les poids du modèle pré-entraîné
    for layer in model0.layers:
        layer.trainable = False

    # Récupérer la sortie de ce réseau
    x = model0.output
    # Compléter le modèle
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(nb_lab, activation='softmax')(x)

    # Définir le nouveau modèle
    model = Model(inputs=model0.input, outputs=predictions)
    # compilation du modèle 
    model.compile(loss="categorical_crossentropy", optimizer='rmsprop', metrics=["accuracy"])

    print(model.summary())
    
    return model

In [78]:
le_4 = LabelEncoder()
label_dict_4 = {'HGSC': 0, 'Others': 1}
le_4.fit_transform(list(label_dict_4.keys()))

le_5 = LabelEncoder()
label_dict_5 = {'CC': 0, 'EC': 1, 'Others': 1}
le_5.fit_transform(list(label_dict_5.keys()))

le_6 = LabelEncoder()
label_dict_6 = {'LGSC': 0, 'MC': 1}
le_6.fit_transform(list(label_dict_6.keys()))

array([0, 1])

In [79]:
# Model instanciation
with tf.device('/gpu:1'): 
    model_4 = create_model_fct(2)
    model_5 = create_model_fct(3)
    model_6 = create_model_fct(2)
    
    model_4.load_weights('/kaggle/input/model-weights/model_best_weights_4.h5')
    model_5.load_weights('/kaggle/input/model-weights/model_best_weights_5.h5')
    model_6.load_weights('/kaggle/input/model-weights/model_best_weights_6.h5')   

Model: "model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_21 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0  

# 4-Predictions

In [80]:
def predict_data(df, model_path, lex):
    # Charger le modèle pré-entraîné
    # Assurez-vous d'ajuster le chemin vers votre modèle
    #model = load_model("/kaggle/input/weights-4/model_best_weights_4.h5")
    model = load_model(model_path)
    data_ = df.copy()

    # Charger et prétraiter les images pour la prédiction
    image_paths = data_['image_path']  # Remplacez 'your_dataframe' par le nom réel de votre DataFrame
    predictions_list = []

    for img_path in image_paths:
        # Charger l'image
        img = image.load_img(img_path, target_size=(224, 224))

        # Prétraiter l'image pour l'entrée dans le modèle
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Faire la prédiction
        prediction = model.predict(img_array)

        # Ajouter la prédiction à la liste
        predictions_list.append(prediction)

    # Concaténer toutes les prédictions dans un seul tableau numpy
    y_pred = np.concatenate(predictions_list, axis=0)

    # Retrouver les labels categorielles
    y_pred_cat = lex.inverse_transform(np.argmax(y_pred, axis=1))

    # Mettre à jour le df data avec une colonne pred
    data_.loc[:, 'pred'] = y_pred_cat

    return data_

In [81]:
data_copy = data.copy()
data_4 = predict_data(data, "/kaggle/input/model-weights/model_best_weights_4.h5", le_4)
data_4_copy = data_4.copy()
data_5 = predict_data(data_4_copy.loc[data_4_copy['pred']=='Others',:], "/kaggle/input/model-weights/model_best_weights_5.h5", le_5)
data_5_copy = data_5.copy()
data_6 = predict_data(data_5_copy.loc[data_5_copy['pred']=='Others',:], "/kaggle/input/model-weights/model_best_weights_6.h5", le_6)    



ValueError: need at least one array to concatenate

In [None]:
submit_init = pd.concat([data_4,data_5,data_6])
submit_init = submit_init.loc[submit_init['pred']!='Others',:]
submit = pd.DataFrame()
submit['image_id'] = submit_init['image_id']
submit['label'] = submit_init['pred']

In [None]:
result = submit_init

In [None]:
result[30:70]

# 5-Performances (test)

print('4/6-building the basic confusion matrix')
# Obtenez les indices des classes prédites et réelles pour les échantillons disponibles
y_val_indices = result['label']
y_pred_indices = result['pred']

# Générer la matrice de confusion
cm = confusion_matrix(y_val_indices, y_pred_indices)

# Afficher la matrice de confusion
print(cm)

# Afficher le rapport de classification
print("\n5/6-building the classification report")
print(classification_report(y_val_indices, y_pred_indices))

print('6/6-building the sns confusion matrix')
# Finding the matching categorical labels for the numerical labels
list_cat_labels = sorted([x for x in set(y_val_indices)|set(y_pred_indices)])

# Proceding with sns
df_cm = pd.DataFrame(cm, index=list_cat_labels, columns=list_cat_labels)

plt.figure(figsize=(6, 4))
ax = sns.heatmap(df_cm, annot=True, cmap="Reds")

# Ajouter des étiquettes aux axes
ax.set_xlabel("Prediction")
ax.set_ylabel("Actual")

plt.show()

# 6-Submit

In [None]:
submit.head()

In [None]:
submit.shape

In [None]:
submit.to_csv('submission.csv',index=False)