<a href="https://colab.research.google.com/github/Demiarge/kaggle-platesv2/blob/main/kaggle_compi_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kaggle input and unzipping data to use properly

In [2]:
# install Kagglee
!pip install kaggle

# Upload Kaggle API key
from google.colab import files
files.upload()

# Move API key to the app location
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the competition data
!kaggle competitions download -c platesv2

# Unzip the data
!unzip platesv2.zip -d /content/temp
!rm -rf platesv2.zip #dlt

# Unzip main data
!cd /content/temp/
!ls
!unzip /content/temp/plates.zip -d /content/platesv2
!rm -rf /content/temp/

# move data in data folder
!mv /content/platesv2/plates/ /content/data/
!rm -rf /content/platesv2/



Saving kaggle.json to kaggle.json
Downloading platesv2.zip to /content
 77% 36.0M/47.0M [00:00<00:00, 177MB/s]
100% 47.0M/47.0M [00:00<00:00, 184MB/s]
Archive:  platesv2.zip
  inflating: /content/temp/plates.zip  
  inflating: /content/temp/sample_submission.csv  
sample_data  temp
Archive:  /content/temp/plates.zip
   creating: /content/platesv2/plates/
  inflating: /content/platesv2/plates/.DS_Store  
   creating: /content/platesv2/__MACOSX/
   creating: /content/platesv2/__MACOSX/plates/
  inflating: /content/platesv2/__MACOSX/plates/._.DS_Store  
   creating: /content/platesv2/plates/test/
  inflating: /content/platesv2/plates/test/0071.jpg  
  inflating: /content/platesv2/plates/test/0717.jpg  
  inflating: /content/platesv2/plates/test/0703.jpg  
  inflating: /content/platesv2/plates/test/0065.jpg  
  inflating: /content/platesv2/plates/test/0059.jpg  
  inflating: /content/platesv2/plates/test/0515.jpg  
  inflating: /content/platesv2/plates/test/0273.jpg  
  inflating: /content

In [None]:
# Additional cell for use

!ls -la

# Import library

In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import shutil


# data traning


In [4]:
# Paths
train_dir = '/content/data/train'
test_dir = '/content/data/test'

cleaned_dir = os.path.join(train_dir, 'cleaned')
dirty_dir = os.path.join(train_dir, 'dirty')


In [5]:
# data generator
# Define parameters
img_size = 224
batch_size = 16

# Train data generator
train_datagen = ImageDataGenerator(
    rescale=1.0/255,         # Normalize images to [0, 1]
    rotation_range=20,       # Random rotation
    width_shift_range=0.2,   # Random horizontal shift
    height_shift_range=0.2,  # Random vertical shift
    shear_range=0.2,         # Random shearing
    zoom_range=0.2,          # Random zoom
    horizontal_flip=True,    # Flip images horizontally
    validation_split=0.2     # Reserve 20% for validation
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'  # Training subset
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'  # Validation subset
)

# Test data generator
test_datagen = ImageDataGenerator(rescale=1.0/255)

test_generator = test_datagen.flow_from_directory(
    test_dir,  # Dummy directory for test images
    target_size=(img_size, img_size),
    batch_size=1,
    class_mode=None,
    shuffle=False  # Keep the order for submission
)


Found 32 images belonging to 2 classes.
Found 8 images belonging to 2 classes.
Found 0 images belonging to 0 classes.


# Model

In [6]:
# Load Pretrained ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

# Add custom layers for binary classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)  # Binary classification

model = Model(inputs=base_model.input, outputs=output)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Train the model
epochs = 10
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    epochs=epochs
)


Epoch 1/10


  self._warn_if_super_not_called()


KeyboardInterrupt: 

In [None]:
# tuning  optional
# Unfreeze some layers in the base model
for layer in base_model.layers[-30:]:
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Retrain the model
fine_tune_epochs = 5
history_fine = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    epochs=fine_tune_epochs
)


# Evaluate and submission

In [None]:
# Predict on test data
predictions = model.predict(test_generator, steps=test_generator.samples)

# Convert probabilities to binary labels (0 = dirty, 1 = cleaned)
predicted_labels = (predictions > 0.5).astype(int).ravel()

# Save predictions in the required format
import pandas as pd

# Generate submission file
image_ids = [f"{i:04d}.jpg" for i in range(test_generator.samples)]
submission = pd.DataFrame({'id': image_ids, 'label': predicted_labels})
submission.to_csv('submission.csv', index=False)


In [None]:
from google.colab import files
files.download('submission.csv')
