In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import os
import numpy as np
from tensorflow.keras.preprocessing import image
import zipfile

In [30]:
# Path to your zip file
zip_file_path = 'Dataset.zip' # Please replace with your own relative path

# Destination directory where you want to extract the images
extract_dir = 'Dataset_unzipped' # Please create a file and again replace with your own relative path

# Unzip the dataset
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Check if the dataset is extracted properly
print(f"Files extracted to: {os.listdir(extract_dir)}")

Files extracted to: ['Dataset']


In [32]:
# Update the base_dir to include the 'Dataset' folder
base_dir = os.path.join(extract_dir, 'Dataset')

# Check the contents of 'Dataset' to ensure it contains the necessary subdirectories
print(f"Subdirectories inside Dataset: {os.listdir(base_dir)}")


Subdirectories inside Dataset: ['Danny Devito', 'Nicholas Cage']


In [33]:
# Use ImageDataGenerator for data augmentation (optional)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=60,          # Rotate images randomly up to 60 degrees
    width_shift_range=0.4,      # Horizontally shift images up to 40% of width
    height_shift_range=0.4,     # Vertically shift images up to 40% of height
    shear_range=0.4,            # Shear transformation
    zoom_range=0.4,             # Zoom in/out
    horizontal_flip=True,       # Flip the image horizontally
    fill_mode='nearest',        # Fill missing pixels with nearest
    validation_split=0.2        # Use 20% of data for validation
)

# Load the images for training and validation
train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),    # Resize all images to 150x150
    batch_size=32,             # Batch size for training
    class_mode='binary',       # Binary classification (Nicolas Cage vs Danny DeVito)
    subset='training'          # Use this for training data
)

validation_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),    # Resize all images to 150x150
    batch_size=32,             # Batch size for validation
    class_mode='binary',       # Binary classification
    subset='validation'        # Use this for validation data
)


Found 288 images belonging to 2 classes.


Found 70 images belonging to 2 classes.


In [19]:
# Build the CNN model
model = models.Sequential()

# First convolutional layer
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D(2, 2))

# Second convolutional layer
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

# Third convolutional layer
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

# Fourth convolutional layer
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

# Flatten the output from the conv layers
model.add(layers.Flatten())

# Fully connected layer with Dropout and L2 regularization
model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.5))  # Drop 50% of the nodes to prevent overfitting

# Output layer (1 neuron for binary classification with sigmoid)
model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model with Adam optimizer and lower learning rate
model.compile(optimizer=Adam(learning_rate=0.0001),  # Lower learning rate
              loss='binary_crossentropy',            # Loss function for binary classification
              metrics=['accuracy'])

# Print the model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
# Early stopping to prevent overtraining and restore the best model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [21]:
#Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=50,  # Train for up to 50 epochs
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    callbacks=[early_stopping]  # Use early stopping to stop training if validation loss doesn't improve
)

Epoch 1/50


  self._warn_if_super_not_called()


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.4676 - loss: 1.6648 - val_accuracy: 0.5625 - val_loss: 1.5979
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.5000 - val_loss: 1.6114
Epoch 3/50


  self.gen.throw(typ, value, traceback)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.5390 - loss: 1.5847 - val_accuracy: 0.5156 - val_loss: 1.5316
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.3333 - val_loss: 1.5638
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 923ms/step - accuracy: 0.5579 - loss: 1.5099 - val_accuracy: 0.6562 - val_loss: 1.4506
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.8333 - val_loss: 1.4438
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 929ms/step - accuracy: 0.6100 - loss: 1.4333 - val_accuracy: 0.5312 - val_loss: 1.3904
Epoch 8/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.5000 - val_loss: 1.4413
Epoch 9/50
[1m9/9[0m [32m━━

In [22]:
# Evaluate the model on validation data
loss, accuracy = model.evaluate(validation_generator)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 256ms/step - accuracy: 0.7165 - loss: 1.0214
Validation Accuracy: 71.43%


It looks like the model is achieving a validation accuracy of 71.43% with a training accuracy of 71.65% and a validation loss of 1.0214. This is a solid performance, though it suggests that there's room for improvement in terms of both accuracy and reducing loss

In [26]:
# Load and preprocess a new image for testing
img_path = 'Test_Image.jpg'  # Please replace with the path to your test image
img = image.load_img(img_path, target_size=(150, 150))  # Resize image to 150x150

# Convert the image to an array
img_array = image.img_to_array(img)

# Expand dimensions to match the shape of a batch (batch size, height, width, channels)
img_array = np.expand_dims(img_array, axis=0)

# Normalize the pixel values (same normalization applied during training)
img_array = img_array / 255.0

# Predict the class (returns a probability between 0 and 1)
prediction = model.predict(img_array)

# Interpret the prediction result
if prediction[0] > 0.5:
    print("This is Danny DeVito!")
else:
    print("This is Nicolas Cage!")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
This is Danny DeVito!


Well... it wasnt Danny Devito.

I regret making it sound so enthusatic...