In [1]:
# Part 1
# You will essentially build your classifier as follows:
# 1. Import libraries, modules, and packages you will need. Make sure to import the *preprocess_input* function from <code>keras.applications.vgg16</code>.
# 2. Use a batch size of 100 images for both training and validation.
# 3. Construct an ImageDataGenerator for the training set and another one for the validation set. VGG16 was originally trained on 224 × 224 images, so make sure to address that when defining the ImageDataGenerator instances.
# 4. Create a sequential model using Keras. Add VGG16 model to it and dense layer.
# 5. Compile the mode using the adam optimizer and the categorical_crossentropy loss function.
# 6. Fit the model on the augmented data using the ImageDataGenerators.

In [2]:
# Steps to Build the Image Classifier using VGG16:
# First, import the required libraries, including the VGG16 model, preprocess_input, and others for handling data and defining the model

In [3]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.applications import VGG16, ResNet50
from keras.applications.vgg16 import preprocess_input as preprocess_input_vgg
from keras.applications.resnet50 import preprocess_input as preprocess_input_resnet
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

In [4]:
# download the data from unzip file for google colabs
# Step 1: Download the zip file using wget
!wget https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0321EN/data/concrete_data_week4.zip

# Step 2: Unzip the file
!unzip concrete_data_week4.zip

# Step 3: Verify the contents of the unzipped folder
!ls

--2024-10-01 09:11:54--  https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0321EN/data/concrete_data_week4.zip
Resolving s3-api.us-geo.objectstorage.softlayer.net (s3-api.us-geo.objectstorage.softlayer.net)... 67.228.254.196
Connecting to s3-api.us-geo.objectstorage.softlayer.net (s3-api.us-geo.objectstorage.softlayer.net)|67.228.254.196|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 261483817 (249M) [application/zip]
Saving to: ‘concrete_data_week4.zip.1’

Archive:  concrete_data_week4.zip
replace concrete_data_week4/valid/positive/16679_1.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: concrete_data_week4  concrete_data_week4.zip  concrete_data_week4.zip.1  __MACOSX  sample_data


In [5]:
# Step 2: Prepare the Data Generators
# VGG16 was trained on images of size 224x224. We will use this target size for both models.

In [27]:
# Initialize ImageDataGenerators for training and validation
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input_vgg)
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input_vgg)

# Path to your dataset directories
train_dir = '/content/concrete_data_week4/train'
valid_dir = '/content/concrete_data_week4/valid'

# Load the training and validation data
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(224, 224),
                                                    batch_size=100,
                                                    class_mode='categorical')

valid_generator = valid_datagen.flow_from_directory(valid_dir,
                                                    target_size=(224, 224),
                                                    batch_size=100,
                                                    class_mode='categorical')


Found 30001 images belonging to 2 classes.
Found 9501 images belonging to 2 classes.


In [28]:
# Step 3: Build the VGG16-based Classifier
# Load VGG16 without the top layers
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Build a Sequential model
vgg16_model = Sequential()

# Add VGG16 base model
vgg16_model.add(vgg16_base)

# Flatten and add a Dense layer for classification
vgg16_model.add(Flatten())
vgg16_model.add(Dense(256, activation='relu'))
vgg16_model.add(Dense(train_generator.num_classes, activation='softmax'))

# Compile the model
vgg16_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


In [29]:
# Step 4: Train the VGG16 Model
# Train the VGG16 model
vgg16_model.fit(train_generator,
                validation_data=valid_generator,
                epochs=1,
                steps_per_epoch=train_generator.samples // train_generator.batch_size,
                validation_steps=valid_generator.samples // valid_generator.batch_size)


  self._warn_if_super_not_called()


[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m489s[0m 2s/step - accuracy: 0.7458 - loss: 29.1825 - val_accuracy: 0.9866 - val_loss: 0.0641


<keras.src.callbacks.history.History at 0x78c665128ca0>

In [9]:
# NB: the long runtime issue was I was having was due to google colabs on CPU once i changed to Gpu the runtime droped from 5 hours to 5 minutes
 # Part 2
# In this part, you will evaluate your deep learning models on a test data. For this part, you will need to do the following:
# 1. Load your saved model that was built using the ResNet50 model.
# 2. Construct an ImageDataGenerator for the test set. For this ImageDataGenerator instance, you only need to pass the directory of the test images, target size, and the **shuffle** parameter and set it to False.
# 3. Use the **evaluate_generator** method to evaluate your models on the test data, by passing the above ImageDataGenerator as an argument. You can learn more about **evaluate_generator** [here](https://keras.io/models/sequential/).
# 4. Print the performance of the classifier using the VGG16 pre-trained model.
# 5. Print the performance of the classifier using the ResNet pre-trained model.

In [30]:
# Step 5: Build the ResNet50-based Classifier
# To compare, let’s load and compile a ResNet50-based classifier.
# Load ResNet50 without the top layers
resnet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Build a Sequential model
resnet50_model = Sequential()

# Add ResNet50 base model
resnet50_model.add(resnet50_base)

# Flatten and add a Dense layer for classification
resnet50_model.add(Flatten())
resnet50_model.add(Dense(256, activation='relu'))
resnet50_model.add(Dense(train_generator.num_classes, activation='softmax'))

# Compile the model
resnet50_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


In [31]:
# Step 6: Evaluate the Models on the Test Data
# Path to your test dataset
test_dir = '/content/concrete_data_week4/test'

# Create an ImageDataGenerator for the test set (no data augmentation, just preprocessing)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input_vgg)

# Load the test data
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(224, 224),
                                                  batch_size=100,
                                                  class_mode='categorical',
                                                  shuffle=False)

# Evaluate the VGG16 model
# Workaround to mimic evaluate_generator (if required by the project):
vgg16_performance = vgg16_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print(f"VGG16 Test Accuracy: {vgg16_performance[1]}")

resnet50_performance = resnet50_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print(f"ResNet50 Test Accuracy: {resnet50_performance[1]}")

Found 500 images belonging to 2 classes.
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 413ms/step - accuracy: 0.9884 - loss: 0.0297
VGG16 Test Accuracy: 0.9879999756813049
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 384ms/step - accuracy: 0.7733 - loss: 0.6364
ResNet50 Test Accuracy: 0.6439999938011169


In [32]:
# Just putting it in a better layout:
print(f"VGG16: {(vgg16_performance[1]*100)} % Test Accuracy")
print(f"ResNet50: {(resnet50_performance[1]*100)} % Test Accuracy")

VGG16: 98.7999975681305 % Test Accuracy
ResNet50: 64.3999993801117 % Test Accuracy


In [15]:
# Part 3
# In this model, you will predict whether the images in the test data are images of cracked concrete or not. You will do the following:

# 1. Use the **predict_generator** method to predict the class of the images in the test data,
# by passing the test data ImageDataGenerator instance defined in the previous part as an argument.
# You can learn more about the **predict_generator** method [here](https://keras.io/models/sequential/).
# 2. Report the class predictions of the first five images in the test set. You should print something list this:
# Positive
# Negative
# Positive
# Positive
# Negative

In [33]:
# Make predictions on the test data
predictions = vgg16_model.predict_generator(test_generator)

AttributeError: 'Sequential' object has no attribute 'predict_generator'

In [34]:
# using a newer version of Keras, the predict_generator method has been deprecated and removed.
# updated predict method, which works with generators in the same way and achieves the same result.
# Make predictions on the test data using the updated 'predict' method
predictions = vgg16_model.predict(test_generator)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 415ms/step


In [38]:
# Get the class indices from the test generator
class_indices = test_generator.class_indices
class_labels = {v: k for k, v in class_indices.items()}  # Reverse the class indices to get label names

# Convert predictions into class indices (0 or 1)
predicted_classes = predictions.argmax(axis=-1)

# Convert class indices into human-readable labels
predicted_labels = [class_labels[idx] for idx in predicted_classes]

# Print the predictions and probabilities for the first five images in the test set
print("Predictions and Probabilities for the first five images:")
for i in range(5):
    label = predicted_labels[i]
    probabilities = predictions[i]
    print(f"Image {i+1}: {label.capitalize()} | Probabilities: Negative: {probabilities[0]:.4f}, Positive: {probabilities[1]:.4f}")



Predictions and Probabilities for the first five images:
Image 1: Negative | Probabilities: Negative: 1.0000, Positive: 0.0000
Image 2: Negative | Probabilities: Negative: 1.0000, Positive: 0.0000
Image 3: Negative | Probabilities: Negative: 1.0000, Positive: 0.0000
Image 4: Negative | Probabilities: Negative: 1.0000, Positive: 0.0000
Image 5: Negative | Probabilities: Negative: 0.9994, Positive: 0.0006


In [39]:
vgg16_model.summary()
