# Explore here - Problem Statement | Background


### Import Libraries

In [None]:
import os
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.preprocessing import image

In [None]:
import tensorflow as tf
print(tf.__version__)

2.15.0


In [None]:
pip install --upgrade tensorflow
pip install --upgrade keras

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Step 1: Loading the images from the local folder

In [None]:
# Now, let's check if the directories exist
import os

train_dir = '/content/drive/MyDrive/Colab _Notebooks/4Geeks/Unsupervised/Image_classification/train'
test_dir = '/content/drive/MyDrive/Colab _Notebooks/4Geeks/Unsupervised/Image_classification/test/test1'
#'/content/drive/MyDrive/Colab _Notebooks/4Geeks/Unsupervised/Image_classification/test'
# Check if the directories exist
train_dir_exists = os.path.isdir(train_dir)
test_dir_exists = os.path.isdir(test_dir)

train_dir_exists, test_dir_exists

(True, True)

In [None]:
# Define image dimensions and batch size
img_width, img_height = 200, 200
batch_size = 10

# Initialize ImageDataGenerator objects for training and test data with basic preprocessing
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load and preprocess training data
print("Loading training data...")
trdata = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',  # or 'categorical' for multi-class
    shuffle=True
)
print(f"Found {trdata.samples} images belonging to {trdata.num_classes} classes for training.")

Loading training data...
Found 24405 images belonging to 2 classes.
Found 24405 images belonging to 2 classes for training.


In [None]:
# Verify and preprocess test data
print("\nVerifying test directory...")
if not os.path.exists(test_dir) or not os.listdir(test_dir):
    print("Test directory is missing or empty. Please check the path and contents.")
else:
    # List first few files in the test directory to verify
    test_files = os.listdir(test_dir)
    print("First 5 files in test directory:", test_files[:5])

    # Check for image file extensions
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
    image_files = [file for file in test_files if any(file.lower().endswith(ext) for ext in image_extensions)]
    print(f"Found {len(image_files)} image files in test directory.")

    if len(image_files) == 0:
        print("No supported image files found. Please check the formats and structure.")
    else:
        # Adjusted section for loading test data without class subdirectories
        print("Supported image files found. Proceeding with loading test data...")
        tsdata = test_datagen.flow_from_directory(
            test_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode=None,  # Set class_mode to None for unlabeled data
            shuffle=False
)
print(f"Found {len(tsdata.filenames)} images for testing.")


Verifying test directory...
First 5 files in test directory: ['9053.jpg', '9070.jpg', '9056.jpg', '9069.jpg', '9063.jpg']
Found 12459 image files in test directory.
Supported image files found. Proceeding with loading test data...
Found 0 images belonging to 0 classes.
Found 0 images for testing.


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

# Define the CNN structure with improvements
model = Sequential()
# He initialization is specified by the 'kernel_initializer' parameter.
model.add(Conv2D(input_shape=(200, 200, 3), filters=64, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))  # Dropout layer to reduce overfitting

model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(units=4096, activation="relu", kernel_initializer='he_uniform'))
model.add(Dropout(0.5))  # Increased dropout for dense layer
model.add(Dense(units=4096, activation="relu", kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(units=2, activation="softmax"))

# Compile the model with a revised learning rate and optimizer
model.compile(optimizer=Adam(learning_rate=0.0001),  # Reduced learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Model summary to check the structure and parameters
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 200, 200, 64)      1792      
                                                                 
 batch_normalization (Batch  (None, 200, 200, 64)      256       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 200, 200, 64)      36928     
                                                                 
 batch_normalization_1 (Bat  (None, 200, 200, 64)      256       
 chNormalization)                                                
                                                                 
 max_pooling2d (MaxPooling2  (None, 100, 100, 64)      0         
 D)                                                              
                                                        

In [None]:
# Training the model
print("Starting training...")
history = model.fit(trdata,
                    epochs=5,
                    validation_data=tsdata)  # Assuming tsdata is your test dataset used as validation here

# Evaluating the model
print("Training completed.")
print("Evaluating model...")
# Since tsdata is unlabeled, direct evaluation might not be possible. Typically, you'd use model.predict(tsdata) and manually assess predictions.

Starting training...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training completed.
Evaluating model...


In [None]:
from keras.models import Model  # Assuming 'model' is a Keras model


# Define the path where the model will be saved, including the subfolder
file_path = "models/vgg16_1.h5"

# Check if the directory exists, and if not, create it
if not os.path.exists(os.path.dirname(file_path)):
    os.makedirs(os.path.dirname(file_path))

# Save the model to the specified path using Keras' save function
model.save(file_path)

print(f"Model saved successfully to {file_path}")

  saving_api.save_model(


Model saved successfully to models/vgg16_1.h5


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint = ModelCheckpoint("/models/vgg16_1.h5", monitor = "val_accuracy", verbose = 1, save_best_only = True, save_weights_only = False, mode = "auto")
early = EarlyStopping(monitor = "val_accuracy", patience = 3, verbose = 1, mode = "auto")
hist = model.fit(trdata, steps_per_epoch = 100, validation_data = tsdata, validation_steps = 10, epochs = 3, callbacks = [checkpoint, early])

Epoch 1/3



Epoch 2/3



Epoch 3/3





In [None]:
from pickle import dump

# Define the path where the model will be saved, including the subfolder
file_path = "models/vgg16_1.h5"

# Save the model to the specified path
dump(model, open(file_path, "wb"))