In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

# Define directories
train_dir = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train'
test_dir = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test'
validation_dir = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation'

# Load datasets
train_df = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train.csv')
test_df = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test.csv')
validate_df = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-validation.csv')

# Data augmentation configuration for training
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    rescale=1./255
)

# Note: No augmentation for validation and test data, only rescaling
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Convert dataframe to a format suitable for the model training
def df_to_dataset(dataframe, datagen, directory, batch_size=32):
    return datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=directory,
        x_col='imageFile',  # Column in dataframe that contains the filenames
        y_col='classLabel',  # Column in dataframe that contains the class/label
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical'  # Change this if not a multiclass classification
    )
# Create datasets for training, validation, and testing
train_dataset = df_to_dataset(train_df, train_datagen, train_dir)
validation_dataset = df_to_dataset(validate_df, validation_datagen, validation_dir)

# This setup is now ready for training with model.fit using the train_dataset and validation_dataset


2024-03-22 18:56:41.581130: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-22 18:56:41.581290: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-22 18:56:41.745037: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 81946 validated image filenames belonging to 10 classes.
Found 10244 validated image filenames belonging to 10 classes.


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2

# load MobileNetV2 model, pretrained on ImageNet, without top layer
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# freeze base model
base_model.trainable = False


model = Sequential([
    base_model,
    # Convert features to vectors
    tf.keras.layers.GlobalAveragePooling2D(),
    # Add a dense layer for classification
    Dense(1024, activation='relu'),
    # Final layer with softmax activation for multi-class classification
    Dense(10, activation='softmax')
])


model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True),
    ModelCheckpoint('best_model.h5.keras', monitor='val_loss', save_best_only=True)
]


history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=3,
    callbacks=callbacks
)

Epoch 1/3


  self._warn_if_super_not_called()


[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3713s[0m 1s/step - accuracy: 0.8580 - loss: 0.4160 - val_accuracy: 0.8945 - val_loss: 0.3031
Epoch 2/3
[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3799s[0m 1s/step - accuracy: 0.9184 - loss: 0.2268 - val_accuracy: 0.9082 - val_loss: 0.2571
Epoch 3/3
[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3686s[0m 1s/step - accuracy: 0.9273 - loss: 0.2016 - val_accuracy: 0.9206 - val_loss: 0.2322
Restoring model weights from the end of the best epoch: 3.


In [4]:
validation_loss, validation_accuracy = model.evaluate(validation_dataset)
print(f'Validation Loss: {validation_loss}')
print(f'Validation Accuracy: {validation_accuracy}')

[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 899ms/step - accuracy: 0.9171 - loss: 0.2290
Validation Loss: 0.2287425845861435
Validation Accuracy: 0.9206364750862122


In [5]:
test_dataset = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_dir,
    x_col='imageFile',  
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,  
    shuffle=False
)

Found 30690 validated image filenames.


In [6]:
predictions = model.predict(test_dataset)
predicted_class_indices = np.argmax(predictions, axis=1)

[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m866s[0m 900ms/step


In [7]:
labels = (train_dataset.class_indices)
labels = dict((v,k) for k,v in labels.items())
predicted_class_ids = [labels[v] for v in predicted_class_indices]

submission_df = pd.DataFrame({'uniqueID': test_df['uniqueID'], 'classID': predicted_class_indices})
submission_df.to_csv('submission.csv', index=False)