In [1]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import numpy as np


In [2]:
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

train_df = pd.read_csv("../data/train.csv")
val_df = pd.read_csv("../data/validation.csv")
test_df = pd.read_csv("../data/test.csv")
test_df['label'] = 0

train_df["image_id"] = train_df["image_id"].apply(lambda x: os.path.join("../data/train", f"{x}.png"))
val_df["image_id"] = val_df["image_id"].apply(lambda x: os.path.join("../data/validation", f"{x}.png"))
test_df["image_id"] = test_df["image_id"].apply(lambda x: os.path.join("../data/test", f"{x}.png"))

train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)


In [3]:
train_gen = ImageDataGenerator(rescale=1./255, rotation_range=15, horizontal_flip=True)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_dataframe(
    train_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="sparse")

val_data = val_gen.flow_from_dataframe(
    val_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="sparse")

test_data = test_gen.flow_from_dataframe(
    test_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode=None, shuffle=False)


Found 12500 validated image filenames belonging to 5 classes.
Found 1250 validated image filenames belonging to 5 classes.
Found 6500 validated image filenames.


In [4]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(*IMG_SIZE, 3)),
    layers.MaxPooling2D(2, 2),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(5, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
history = model.fit(train_data, epochs=10, validation_data=val_data)


Epoch 1/10


  self._warn_if_super_not_called()


[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 207ms/step - accuracy: 0.4306 - loss: 1.3009 - val_accuracy: 0.6824 - val_loss: 0.8004
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 212ms/step - accuracy: 0.6843 - loss: 0.7894 - val_accuracy: 0.7072 - val_loss: 0.7120
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 205ms/step - accuracy: 0.7553 - loss: 0.6509 - val_accuracy: 0.7528 - val_loss: 0.6356
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 204ms/step - accuracy: 0.7773 - loss: 0.5797 - val_accuracy: 0.7760 - val_loss: 0.5436
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 202ms/step - accuracy: 0.7880 - loss: 0.5520 - val_accuracy: 0.6816 - val_loss: 0.8830
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 205ms/step - accuracy: 0.8022 - loss: 0.5109 - val_accuracy: 0.8040 - val_loss: 0.5232
Epoch 7/10
[1m391/39

In [6]:
pred_probs = model.predict(test_data)
pred_labels = np.argmax(pred_probs, axis=1)

submission = pd.DataFrame({
    "image_id": test_df["image_id"].apply(lambda x: os.path.splitext(os.path.basename(x))[0]),
    "label": pred_labels
})
submission.to_csv("submission_cnn.csv", index=False)


[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step
