Import libraries and define constants

In [4]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score
import numpy as np

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

Load and preprocess CSV data

In [None]:

train_df = pd.read_csv("data/train.csv")
val_df = pd.read_csv("data/validation.csv")
test_df = pd.read_csv("data/test.csv")
test_df['label'] = 0  # Adding label column

# Path instead of id
train_df["image_id"] = train_df["image_id"].apply(lambda x: os.path.join("data/train", f"{x}.png"))
val_df["image_id"] = val_df["image_id"].apply(lambda x: os.path.join("data/validation", f"{x}.png"))
test_df["image_id"] = test_df["image_id"].apply(lambda x: os.path.join("data/test", f"{x}.png"))

# Convert labels to string
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)


Set up image data generators

In [None]:
# normalizes the pixel values from [0, 255] to [0, 1], which helps neural networks train better.
train_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_dataframe(
    train_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="sparse")

val_data = val_gen.flow_from_dataframe(
    val_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="sparse")

test_data = test_gen.flow_from_dataframe(
    test_df, x_col="image_id", y_col="label",
    target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode=None, shuffle=False)


Found 12500 validated image filenames belonging to 5 classes.
Found 1250 validated image filenames belonging to 5 classes.
Found 6500 validated image filenames.


Build the model using MobileNetV2 base and compile

In [None]:

# Model: MobileNetV2
base_model = MobileNetV2(include_top=False, input_shape=(*IMG_SIZE, 3), weights='imagenet')
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(5, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


Train the model

In [None]:

history = model.fit(train_data, epochs=5, validation_data=val_data)

  self._warn_if_super_not_called()


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m329s[0m 833ms/step - accuracy: 0.5233 - loss: 1.1861 - val_accuracy: 0.7456 - val_loss: 0.6727
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 532ms/step - accuracy: 0.7395 - loss: 0.6923 - val_accuracy: 0.7896 - val_loss: 0.5775
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 534ms/step - accuracy: 0.7735 - loss: 0.5915 - val_accuracy: 0.7968 - val_loss: 0.5506
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 541ms/step - accuracy: 0.8044 - loss: 0.5217 - val_accuracy: 0.8136 - val_loss: 0.5094
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 542ms/step - accuracy: 0.8190 - loss: 0.4696 - val_accuracy: 0.8184 - val_loss: 0.5079


Make predictions on the test set and save predictions to submission file

In [None]:

pred_probs = model.predict(test_data)
pred_labels = np.argmax(pred_probs, axis=1)

submission = pd.DataFrame({
    "image_id": test_df["image_id"].apply(lambda x: os.path.splitext(os.path.basename(x))[0]),
    "label": pred_labels
})
submission.to_csv("submission.csv", index=False)
print("✅ Fisierul 'submission.csv' a fost salvat.")


[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 748ms/step
✅ Fișierul 'submission.csv' a fost salvat.


Save the trained model and variables

In [11]:
import pickle
import json
model.save("mobilenetv2_model.h5")

with open("project_data.pkl", "wb") as f:
    pickle.dump({
        "train_df": train_df,
        "val_df": val_df,
        "test_df": test_df,
        "pred_labels": pred_labels
    }, f)

with open("training_history.json", "w") as f:
    json.dump(history.history, f)

print("✅ Totul a fost salvat.")




✅ Totul a fost salvat.
