In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten,
                                     Dense, Dropout, BatchNormalization)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

In [3]:
import os
import json
import numpy as np
import pandas as pd
TRAIN_IMAGES_path = 'data/raw_data/public_training_set_release_2.0/images'
TRAIN_LABELS_path = 'data/raw_data/public_training_set_release_2.0/annotations.json'
VALID_IMAGES_path = 'data/raw_data/public_validation_set_2.0/images'
VALID_LABELS_path = 'data/raw_data/public_validation_set_2.0/annotations.json'
TEST_IMAGES_path = 'data/raw_data/public_test_release_2.0/images'
TEST_LABELS_path = 'data/raw_data/public_test_release_2.0/annotations.json'

TRAIN_IMAGES = os.listdir(TRAIN_IMAGES_path)
VALID_IMAGES = os.listdir(VALID_IMAGES_path)
TRAIN_LABELS = None
VALID_LABELS = None
with open(TRAIN_LABELS_path, 'r') as f:
    TRAIN_LABELS = json.load(f)
with open(VALID_LABELS_path, 'r') as f:
    VALID_LABELS = json.load(f)

def create_dataframe(annotations_path):
    with open(annotations_path, 'r') as file:
        data = json.load(file)

    images = pd.DataFrame(data['images']).rename(columns={'id': 'image_id'})[['image_id', 'file_name', 'width', 'height']]

    categories = pd.DataFrame(data['categories'])[['id', 'name']]
    categories.rename(columns={'id': 'category_id'}, inplace=True)

    usecols = ['image_id', 'category_id']
    annotations = pd.DataFrame(data['annotations'])[usecols]

    dataframe = annotations.merge(categories, on='category_id').merge(images, on='image_id')[['file_name', 'name']]
    return dataframe
train_df = create_dataframe(TRAIN_LABELS_path)
y_train = train_df['name']
valid_df = create_dataframe(VALID_LABELS_path)
y_valid = valid_df['name']

print(train_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76491 entries, 0 to 76490
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  76491 non-null  object
 1   name       76491 non-null  object
dtypes: object(2)
memory usage: 1.2+ MB
None


In [4]:

train_datagen = ImageDataGenerator(
    rescale=1./255,              # Normalize pixel values
    rotation_range=20,           # Random rotation
    width_shift_range=0.2,       # Horizontal shift
    height_shift_range=0.2,      # Vertical shift
    shear_range=0.2,             # Shear transformation
    zoom_range=0.2,              # Zoom
    horizontal_flip=True,        # Horizontal flip
    fill_mode='nearest'          # Fill mode for missing pixels
)
test_datagen = ImageDataGenerator(rescale=1./255)


In [5]:
# Create generators
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224

train_generator = train_datagen.flow_from_dataframe(
        train_df,
        directory=TRAIN_IMAGES_path,
        x_col='file_name',
        y_col='name',
        target_size=(IMG_HEIGHT, IMG_HEIGHT), 
        batch_size=BATCH_SIZE,
        class_mode='categorical')

valid_generator = train_datagen.flow_from_dataframe(
        valid_df,
        directory=VALID_IMAGES_path,
        x_col='file_name',
        y_col='name',
        target_size=(IMG_HEIGHT, IMG_HEIGHT), 
        batch_size=BATCH_SIZE,
        class_mode='categorical')

# Get the number of classes
num_classes = len(train_generator.class_indices)
print(f"Number of classes: {num_classes}")


Found 76491 validated image filenames belonging to 498 classes.
Found 1830 validated image filenames belonging to 406 classes.
Number of classes: 498


In [6]:
from functools import partial
from tensorflow import keras

model = keras.applications.resnet50.ResNet50(weights="imagenet")
model.summary()



Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [None]:
tf.random.set_seed(42)
np.random.seed(42)
from keras import layers, models

# model = keras.models.Sequential([
#     keras.layers.Conv2D(32, kernel_size=3, padding="same", activation="relu", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
#     keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu"),
#     keras.layers.MaxPool2D(),
#     keras.layers.Flatten(),
#     keras.layers.Dropout(0.25),
#     keras.layers.Dense(128, activation="relu"),
#     keras.layers.Dropout(0.5),
#     keras.layers.Dense(498, activation="softmax")
# ])
base_model = keras.applications.resnet50.ResNet50(
    weights="imagenet", include_top=False, input_shape=(224, 224, 3)
)
x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.5)(x)  # Optional
predictions = layers.Dense(num_classes, activation="softmax")(x)
model = models.Model(inputs=base_model.input, outputs=predictions)

model.compile(
    optimizer="nadam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
model.fit(train_generator, epochs=10)
model.evaluate(valid_generator)


Epoch 1/10
  26/2391 [..............................] - ETA: 5:28:58 - loss: 6.0332 - accuracy: 0.0529