In [None]:
import pandas as pd
import numpy as np
import os

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
# Step 1. Load Data
df = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train.csv', header = 0)
tDf = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test.csv', header = 0)
vDf = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-validation.csv', header = 0)

train_image_directory = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train'
test_image_directory = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-test'
validation_image_directory = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-validation'

In [None]:
# Step 2. Data Preprocessing
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def load_and_preprocess_image(image_data, target_size=(224, 224)):
    image_data = image_data / 255.0 # Rescale to [0, 1]
    layer = Normalization(mean=[0.485, 0.456, 0.406], 
                          variance=[np.square(0.299), 
                                    np.square(0.224), 
                                    np.square(0.225)])
    # tf.image.per_image_standardization(image_data)
    return layer(image_data)

datagen = ImageDataGenerator()
print(df['classLabel'].value_counts())

# Data augmentation configuration for training
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
# https://towardsdatascience.com/exploring-image-data-augmentation-with-keras-and-tensorflow-a8162d89b844
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40, # rotation_range=40,
    width_shift_range=0.15, # width_shift_range=0.2,
    height_shift_range=0.15, # height_shift_range=0.2,
    shear_range=0.15, # shear_range=0.2,
    zoom_range=0.15, # zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    # preprocessing_function=load_and_preprocess_image
)

# Note: No augmentation for validation data, only rescaling
validation_datagen = ImageDataGenerator(
    rescale=1./255 # happens in preprocessing func below
    # preprocessing_function=load_and_preprocess_image
)

# Convert dataframe to a format suitable for the model training
# https://vijayabhaskar96.medium.com/tutorial-on-keras-flow-from-dataframe-1fd4493d237c
def df_to_dataset(dataframe, image_directory, datagen, batch_size=32):
    dataset = datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical'  # Change this if not a multiclass classification
    )
    return dataset

# Create datasets for training and validation
train_dataset = df_to_dataset(df, train_image_directory, train_datagen, 32)
validation_dataset = df_to_dataset(vDf, validation_image_directory, validation_datagen, 32)

# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

augmented_images = [train_dataset[0][0][0] for i in range(5)]
plotImages(augmented_images)

In [None]:
# Step 3. Modeling
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load ResNet50 base model
base_model = ResNet50(weights='imagenet', include_top=False)

# Add custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x)

# Compile the model
#model = Model(inputs=base_model.input, outputs=predictions)
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Step 4. Evaluation
'''
model.fit(
    train_dataset,
    epochs=4,
    validation_data = validation_dataset)


'''

In [None]:
'''
model.save('/kaggle/working/my_Resnet_V2.h5')

from tensorflow.keras.models import load_model
loaded_model = load_model('/kaggle/working/my_Resnet_V2.h5')

'''

In [None]:
#loaded_model

In [None]:
# Step 5. Save as csv
def submit(model):
    test_data = pd.read_csv("/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test.csv")
    test_dir = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test'

    test_datagen = ImageDataGenerator(rescale=1./255)

    test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_data,
        directory=test_dir,
        x_col="imageFile",
        y_col=None,
        target_size=(224, 224), 
        batch_size=32,
        class_mode=None,
        shuffle=False
    )

    test_preds = model.predict(test_generator)
    test_predictions = test_preds.argmax(axis=-1)

    test_data['classID'] = test_predictions
    test_data[['uniqueID', 'classID']].to_csv('/kaggle/working/submission.csv', index=False)
    
    return test_data

In [None]:
#submit(loaded_model).head()

Last Update : 041324 + 041424

How to enhance the model


1. Fine-Tuning the pre-trianed model v
2. Data Augumentaion
3. Regulazlization (L2)
4. Learning rate tweaking
5. Increase model complexity
6. Ensemble learning
7. Data Cleaning
8. Class imbalance handling

In [None]:
# Step further : Fine-Tuning (freeze the layers of pretrained model, and unfreeze)

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
import tensorflow as tf



# Set base model again
model2 = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tuning
# Unfreeze some layers in the base model for fine-tuning
for layer in model2.layers[:100]:  # Unfreeze layers starting from a specific index
    layer.trainable = True

# Compile the model again after unfreezing layers
model2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Lower learning rate for fine-tuning
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fit the model
history = model2.fit(
    train_dataset,
    epochs=10,  # Increase epochs for fine-tuning
    validation_data=validation_dataset
)

# Step 5. Save the model
model2.save('/kaggle/working/fine_tuned_ResNet50.h5')

In [None]:
submit(model2).head()

last update : 041424
    
    85.67% --> 75% --> 96% (epoch 10)
    
   Hopefully 98%