In [15]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Input, Conv2D, MaxPooling2D, Dropout, Rescaling
from tensorflow.keras import mixed_precision
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
mixed_precision.set_global_policy('mixed_float16')

In [16]:
data_dir = 'dataset'

train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels='inferred',            
    label_mode='int',             
    batch_size=32,                
    image_size=(128, 128),        
    shuffle=True,                 
    seed=123,                     
    validation_split=0.2,         
    subset='training',    
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=123,
    validation_split=0.2,
    subset='validation',    
)

Found 24998 files belonging to 2 classes.
Using 19999 files for training.
Found 24998 files belonging to 2 classes.
Using 4999 files for validation.


In [17]:
model = Sequential([
    Rescaling(1./255, input_shape=(128, 128, 3)),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [18]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [19]:
model.summary()

In [20]:
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices())

TensorFlow version: 2.19.0
Num GPUs Available: 1
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [26]:
from PIL import Image
import os

data_dir = 'dataset'
for class_folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, class_folder)
    for fname in os.listdir(folder_path):
        fpath = os.path.join(folder_path, fname)
        try:
            img = Image.open(fpath)
            img.verify()
        except Exception as e:
            print(f"Removing corrupted file: {fpath}")
            os.remove(fpath)


In [21]:
model.fit(
    train_ds, validation_data = val_ds, epochs = 10, batch_size = 32
)

Epoch 1/10
[1m155/625[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m41s[0m 88ms/step - accuracy: 0.5221 - loss: 0.7023

Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9


[1m263/625[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m31s[0m 87ms/step - accuracy: 0.5400 - loss: 0.6936

Corrupt JPEG data: 65 extraneous bytes before marker 0xd9


[1m275/625[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m30s[0m 87ms/step - accuracy: 0.5418 - loss: 0.6927

Corrupt JPEG data: 239 extraneous bytes before marker 0xd9


[1m278/625[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m30s[0m 87ms/step - accuracy: 0.5423 - loss: 0.6925

2025-10-08 20:21:10.246049: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 9381684588561613605
2025-10-08 20:21:10.246162: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 16752398282538157906


InvalidArgumentError: Graph execution error:

Detected at node decode_image/DecodeImage defined at (most recent call last):
<stack traces unavailable>
Detected at node decode_image/DecodeImage defined at (most recent call last):
<stack traces unavailable>
2 root error(s) found.
  (0) INVALID_ARGUMENT:  Input size should match (header_size + row_size * abs_height) but they differ by 2
	 [[{{node decode_image/DecodeImage}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_4]]
  (1) INVALID_ARGUMENT:  Input size should match (header_size + row_size * abs_height) but they differ by 2
	 [[{{node decode_image/DecodeImage}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_multi_step_on_iterator_13651]