In [None]:
import os
import random
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from tensorflow.python.keras.utils.np_utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv2D, BatchNormalization, Dropout, Dense, Activation, MaxPooling2D, Flatten
from tensorflow.keras import Sequential
from tensorflow.config.threading import set_inter_op_parallelism_threads, set_intra_op_parallelism_threads


set_intra_op_parallelism_threads(4)
set_inter_op_parallelism_threads(4)

In [None]:
meta = pd.read_csv('./dataset/Meta.csv')

train_df = pd.read_csv('./dataset/Train.csv')

test_df = pd.read_csv('./dataset/Test.csv')

In [None]:
num_classes = meta.shape[0] + 1  # +1 for the "unknown" class
print(f"Num classes: {num_classes}")
IMG_SIZE = (30, 30)
BATCH_SIZE = 64
SEED = 12
FLICKR_SAMPLE_SIZE = 200  # Number of flickr images to use

def load_images(df, img_size=(30, 30), num_classes=None):
    X = []
    y = []

    base_dir = "./dataset"
    flickr_dir = "./flickr30k_images"
    target_size = (int(img_size[0]), int(img_size[1]))

    if num_classes is None:
        num_classes = df["ClassId"].nunique() + 1

    # Load regular dataset images
    for _, row in df.iterrows():
        path = os.path.normpath(os.path.join(base_dir, row["Path"]))

        if not os.path.exists(path):
            print(f"File does not exist: {path}")
            continue

        img = cv2.imread(path)
        if img is None:
            print(f"No image found in: {path}")
            continue

        img = cv2.resize(img, target_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img.reshape(target_size[1], target_size[0], 1)
        img = img / 255.0

        X.append(img)
        y.append(row["ClassId"])  # Regular class IDs

    # Load flickr images as "unknown" class
    if os.path.exists(flickr_dir):
        flickr_images = []
        for root, dirs, files in os.walk(flickr_dir):
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    flickr_images.append(os.path.join(root, file))

        random.shuffle(flickr_images)
        flickr_images = flickr_images[:FLICKR_SAMPLE_SIZE]

        for flickr_path in flickr_images:
            img = cv2.imread(flickr_path)
            if img is None:
                print(f"No image found in: {flickr_path}")
                continue

            img = cv2.resize(img, target_size)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = img.reshape(target_size[1], target_size[0], 1)
            img = img / 255.0

            X.append(img)
            y.append(num_classes - 1)  # Last class index for "unknown"

    # Convert to numpy arrays
    X = np.array(X, dtype=np.float32)
    y = np.array(y)

    # Shuffle the entire dataset
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    # Convert to categorical
    y_categorical = to_categorical(y, num_classes)

    print(f"Loaded {len(X)} images total ({len(X) - FLICKR_SAMPLE_SIZE} regular + {FLICKR_SAMPLE_SIZE} flickr)")
    print(f"Class distribution: {np.unique(y, return_counts=True)}")

    return X, y_categorical

In [None]:
X, y = load_images(train_df, img_size=IMG_SIZE)
X_test, y_test = load_images(test_df, img_size=IMG_SIZE)


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=SEED)

print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_val:   {X_val.shape}, y_val:   {y_val.shape}")
print(f"X_test:  {X_test.shape}, y_test:  {y_test.shape}")

In [None]:
model = Sequential([
    # 1
    Conv2D(64, (3, 3), padding='same', input_shape=(30, 30, 1)),
    BatchNormalization(),
    Activation('relu'),
    Conv2D(64, (3, 3), padding='same'),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(2, 2),
    Dropout(0.20),

    # 2
    Conv2D(128, (3, 3),  padding='same'),
    BatchNormalization(),
    Activation('relu'),
    Conv2D(128, (3, 3), padding='same'),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(2, 2),
    Dropout(0.25),

    # 3
    Conv2D(256, (3, 3), padding='same'),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(2, 2),
    Dropout(0.25),

    # 4
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint("model.keras", save_best_only=True)

history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stop, checkpoint]
)

model.summary()

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes))

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='val')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()