<a href="https://colab.research.google.com/github/RiteshTripathi123/Image-Classification/blob/main/CNNImageClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Setup and Load data

In [None]:
!pip install tensorflow opencv-python matplotlib
!pip install -q tensorflow-datasets

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import os

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus :
  tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
ds, ds_info = tfds.load(
    'cats_vs_dogs',
    with_info=True,
    as_supervised=True,
  )

In [None]:
print(ds_info)

Removing corrupt images

In [None]:
import cv2
from PIL import Image

In [None]:
import zipfile
import os

# Path to the zip file
zip_path = "/root/tensorflow_datasets/downloads/cats_vs_dogs/down.micr.com_down_3_E_1_3E1C-ECDB-4869-83t5dL0AqEqZkh827kQD8ImFN3e1ro0VHHaobmSQAzSvk.zip"

# Destination directory
extract_to = "/root/ImageClassification"
os.makedirs(extract_to, exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Zip extracted to:", extract_to)

In [None]:
data_dir ='/root/ImageClassification/PetImages'

In [None]:
def thorough_image_cleanup(data_dir):
    """Comprehensive image cleanup that removes all problematic files"""
    if not os.path.exists(data_dir):
        print(f"Directory {data_dir} does not exist!")
        return False

    valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
    removed_count = 0
    total_processed = 0

    for class_name in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        print(f"Processing {class_name} folder...")

        for filename in os.listdir(class_path):
            file_path = os.path.join(class_path, filename)
            total_processed += 1

            # Check file extension
            _, ext = os.path.splitext(filename.lower())
            if ext not in valid_extensions:
                print(f"Removing non-image file: {filename}")
                os.remove(file_path)
                removed_count += 1
                continue

            # Validate image
            is_valid = validate_image(file_path, filename)

            if not is_valid:
                try:
                    os.remove(file_path)
                    removed_count += 1
                    print(f"Removed corrupted file: {filename}")
                except Exception as rm_error:
                    print(f"Could not remove: {filename}, error: {rm_error}")

    print(f"Processed {total_processed} files, removed {removed_count} corrupted files")
    return True

def validate_image(file_path, filename):
    """Validate a single image file"""
    try:
        # Check file size
        if os.path.getsize(file_path) < 100:  # Less than 100 bytes
            print(f"File too small: {filename}")
            return False

        # PIL validation
        with Image.open(file_path) as img:
            img.verify()

        # Reopen and load
        with Image.open(file_path) as img:
            img.load()

            # Check dimensions
            if img.size[0] < 10 or img.size[1] < 10:
                print(f"Image too small: {filename}")
                return False

            # Convert problematic modes
            if img.mode not in ['RGB', 'L', 'RGBA']:
                print(f"Converting {img.mode} to RGB: {filename}")
                rgb_img = img.convert('RGB')
                rgb_img.save(file_path)

        # OpenCV validation
        cv_img = cv2.imread(file_path)
        if cv_img is None:
            print(f"OpenCV cannot read: {filename}")
            return False

        # TensorFlow validation
        try:
            img_raw = tf.io.read_file(file_path)
            img_tensor = tf.image.decode_image(img_raw, channels=3)
            img_resized = tf.image.resize(img_tensor, [256, 256])
            _ = tf.reduce_mean(img_resized).numpy()
        except Exception as tf_error:
            print(f"TensorFlow cannot decode: {filename}, error: {tf_error}")
            return False

        return True

    except Exception as e:
        print(f"General error with {filename}: {e}")
        return False

print("Image validation functions ready!")

In [None]:
print("Starting image cleanup...")
cleanup_success = thorough_image_cleanup(data_dir)

if cleanup_success:
    # Print final counts
    final_total = 0
    for class_name in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_name)
        if os.path.isdir(class_path):
            count = len([f for f in os.listdir(class_path)
                        if os.path.isfile(os.path.join(class_path, f))])
            final_total += count
            print(f"{class_name}: {count} images remaining")

    print(f"Total valid images: {final_total}")
else:
    print("Cleanup failed!")

In [None]:
IMG_SIZE = 256
BATCH_SIZE = 16
def create_normalized_dataset_optimized(data_dir):
    """Create dataset with proper normalization and memory optimization"""
    try:
        # Create base dataset with smaller parameters
        dataset = tf.keras.utils.image_dataset_from_directory(
            data_dir,
            validation_split=None,
            subset=None,
            image_size=(IMG_SIZE, IMG_SIZE),  # Use the same size as raw dataset
            batch_size=BATCH_SIZE,           # Use the same batch size
            shuffle=True,
            seed=123,
            interpolation='bilinear'
        )

        # Memory-efficient normalization
        def normalize_img(image, label):
            # Convert to float32 and normalize to [0,1]
            image = tf.cast(image, tf.float32) / 255.0
            return image, label

        # Apply normalization without caching to save memory
        dataset = dataset.map(normalize_img)
        return dataset

    except Exception as e:
        print(f"Normalized dataset creation failed: {e}")
        return None

# Create normalized dataset
data2 = create_normalized_dataset_optimized(data_dir)

Load data pipeline,allowing access and accessing

In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
data=tf.keras.utils.image_dataset_from_directory('/root/ImageClassification/PetImages')

In [None]:
data_iterator=data.as_numpy_iterator()

In [None]:
batch=data_iterator.next()

In [None]:
len(batch)

In [None]:
#Class 0 = cat
#Class 1 = dog
batch[1]

In [None]:
fig, ax =plt.subplots(ncols=4,figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
  ax[idx].imshow(img.astype(int))
  ax[idx].title.set_text(batch[1][idx])

Preprocessing the data

In [None]:
data1=data.map(lambda x,y:(x/255 ,y))

In [None]:
scaled_iterator=data1.as_numpy_iterator().next()

In [None]:
scaled_iterator[0].max()

In [None]:
fig, ax =plt.subplots(ncols=4,figsize=(20,20))
for idx, img in enumerate(scaled_iterator[0][:4]):
  ax[idx].imshow(img)
  ax[idx].title.set_text(scaled_iterator[1][idx])

In [None]:
len(data)

In [None]:
len(data1)

In [None]:
train_size=int(len(data2)*.7)
val_size=int(len(data2)*.2)
test_size=int(len(data2)*.1)+1

In [None]:
train=data2.take(train_size)
val=data2.skip(train_size).take(val_size)
test=data2.skip(train_size+val_size).take(test_size)

Building model

In [None]:
train_size + val_size + test_size

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,Dropout
from tensorflow.keras.regularizers import l2

In [None]:
model = Sequential()

model.add(Conv2D(16, (3,3), strides=(1,1), padding='same', activation='relu', input_shape=(256,256,3), kernel_regularizer=l2(0.001)))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', activation='relu', kernel_regularizer=l2(0.001)))
model.add(MaxPooling2D())
model.add(Dropout(0.3))

model.add(Conv2D(16, (3,3), strides=(1,1), padding='same', activation='relu', kernel_regularizer=l2(0.001)))
model.add(MaxPooling2D())

model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))


In [None]:
model.compile('adam', loss = tf.losses.BinaryCrossentropy(),metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
log_dir ="/root/ImageClassification/logs"
os.makedirs(log_dir,exist_ok=True)

In [None]:
tensorboard_callback=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
                      tf.keras.callbacks.TensorBoard(log_dir),
                      tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5)]

In [None]:
hist = model.fit(train, epochs=20,validation_data= val,callbacks=[tensorboard_callback])

In [None]:
hist.history

Evaluation

In [None]:
fig=plt.figure()
plt.plot(hist.history['loss'],color = 'cyan', label ='Loss')
plt.plot(hist.history['val_loss'],color = 'orange', label ='Validation_Loss')
plt.suptitle('Loss',fontsize='25')
plt.legend(loc='upper left')
plt.show()

In [None]:
fig=plt.figure()
plt.plot(hist.history['accuracy'],color = 'cyan', label ='Accuracy')
plt.plot(hist.history['val_accuracy'],color = 'orange', label ='Validation_Accuracy')
plt.suptitle('Loss',fontsize='25')
plt.legend(loc='upper left')
plt.show()

In [None]:
from tensorflow.keras.metrics import Precision,BinaryAccuracy,Recall

In [None]:
pre = Precision()
re = Recall()
acc = BinaryAccuracy()

In [None]:
for scaled_iterator in test.as_numpy_iterator():
  X,y = scaled_iterator
  yhat = model.predict(X)
  pre.update_state(y,yhat)
  re.update_state(y,yhat)
  acc.update_state(y,yhat)

In [None]:
print(f'Precision:{pre.result().numpy()},Recall:{re.result().numpy()},Accuracy:{acc.result().numpy()}')

Testing

In [None]:
img=cv2.imread('/content/dogtest.jpeg')
plt.imshow(img)
plt.show()

In [None]:
resize = tf.image.resize(img,(256,256))
#plt.imshow(img/255)
plt.imshow(resize.numpy().astype(int))
plt.show()

In [None]:
yhat=model.predict(np.expand_dims(resize/255,0))

In [None]:
yhat

In [None]:
if yhat > 0.5:
    print(f'Predicted class is Dog')
else:
    print(f'Predicted class is Cat')