In [58]:
#Import necessary libraries to perform binary image classification
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score
import os
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [59]:
#Path to the dataset in our system
path = "C:/Users/panug/Downloads/pizza_not_pizza"

#Data augmentation helps making the model more robust and generalizing it.
#Imagedatagenerator is used for data augmentation and preprocessing.

## Creating image data generators for the train sets
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=30, 
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

#load training dataset
train_set = train_datagen.flow_from_directory(os.path.join(path, 'train'),
                                              target_size=(150,150),
                                              batch_size=32,

                                              class_mode='binary')

#Creating image data generators for the val sets
val_datagen = ImageDataGenerator(rescale=1./255)

#load validation dataset
val_set = val_datagen.flow_from_directory(os.path.join(path, 'val'),
                                          target_size=(150,150),
                                          batch_size=32,
                                          class_mode='binary')

#Creating image data generators for the test sets
test_datagen = ImageDataGenerator(rescale=1./255)

#load test dataset
test_set = test_datagen.flow_from_directory(os.path.join(path, 'test'),
                                            target_size=(150,150),
                                            batch_size=32,
                                            class_mode='binary')

Found 1376 images belonging to 2 classes.
Found 294 images belonging to 2 classes.
Found 296 images belonging to 2 classes.


In [60]:
#create the cnn model
model = Sequential([
    #The convolutional layers
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(110, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    #Fully connected layer s and dense layers
    Flatten(),
    Dense(400, activation='relu'),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid') #output layer(sigmoid is used for binary classification)
]) 

In [61]:
#print the built cnn model summary 
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_32 (Conv2D)          (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_32 (MaxPoolin  (None, 74, 74, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_33 (Conv2D)          (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_33 (MaxPoolin  (None, 36, 36, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_34 (Conv2D)          (None, 34, 34, 110)       63470     
                                                                 
 max_pooling2d_34 (MaxPoolin  (None, 17, 17, 110)     

In [62]:
# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [63]:
# Train the model
#no.of epochs
cnn = model.fit(train_set,epochs=20, validation_data=val_set)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [64]:
#evaluate the train,validation and test loss and accuracies respectively
train_loss, train_accuracy = model.evaluate(train_set)
val_loss, validation_accuracy = model.evaluate(val_set)
test_loss, test_accuracy = model.evaluate(test_set)



In [65]:
# Print the accuracies
print('Train accuracy:', train_accuracy)
print('Validation accuracy:', validation_accuracy)
print('Test accuracy:', test_accuracy)

Train accuracy: 0.8001453280448914
Validation accuracy: 0.7823129296302795
Test accuracy: 0.7466216087341309


In [66]:
#calculating the precision values
from sklearn.metrics import precision_score, recall_score

#predicting the results
y_pred = model.predict(test_set)

#rounding the result values to 1 if >0.5 and 0 if <0.5
y_pred = (y_pred > 0.5).astype(int)
y_true = test_set.classes


#calculate precision value
precision = precision_score(y_true, y_pred)

#calculate recall value
recall = recall_score(y_true, y_pred)

#calculate f1 score
f1cnn = f1_score(y_true, y_pred)

print('F1 score value for cnn: ',f1cnn)
print('Precision value for cnn: ', precision)
print('Recall value for cnn: ', recall)

F1 score value for cnn:  0.5889212827988339
Precision value for cnn:  0.517948717948718
Recall value for cnn:  0.6824324324324325


In [68]:
import keras
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

#consider the fully connected choosing to extract the features(dense_20)
layer_name = model.layers[-3].name
print(layer_name)

output_layer = model.get_layer(layer_name)
#Extract the features from the fully connected layer
train_svm = keras.models.Model(inputs=model.input, outputs=output_layer.output)

#Extract the features from the validation set as asked in the question
X_train = train_svm.predict(val_set)
y_train = val_set.classes

##Train the non-linear svm on the current train dataset
svm = SVC(kernel='rbf', C=1.0, gamma='scale')
svm.fit(X_train, y_train)

test_datagen = ImageDataGenerator(rescale=1./255)
test_set = test_datagen.flow_from_directory(os.path.join(data_path, 'test'),
                                            target_size=(150,150),
                                            batch_size=32,
                                            class_mode='binary')
#Like in the question,extract the features from the same fully connected layer and use as test dataset
X_test = train_svm.predict(test_set)
y_test = test_set.classes

# Evaluate SVM classifier on test set
y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred))

#evaluate the metrics
test_accuracy=accuracy_score(y_test,y_pred)
f1score=f1_score(y_test, y_pred)
recall=recall_score(y_test, y_pred)
#print the obtained test accuracy
print("Test accuracy",test_accuracy)
print("f1score",f1score)
print("recall",recall)


dense_20
Found 296 images belonging to 2 classes.
              precision    recall  f1-score   support

           0       0.56      0.62      0.59       148
           1       0.58      0.51      0.54       148

    accuracy                           0.57       296
   macro avg       0.57      0.57      0.57       296
weighted avg       0.57      0.57      0.57       296

Test accuracy 0.5675675675675675
f1score 0.5428571428571428
recall 0.5135135135135135
