In [1]:
'''
PROBLEM STATEMENT:
Implement a support vector machine (SVM) to classify images of cats and dogs from the Kaggle dataset.
'''

'\nPROBLEM STATEMENT:\nImplement a support vector machine (SVM) to classify images of cats and dogs from the Kaggle dataset.\n'

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import preprocess_input

from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

from sklearn.metrics import classification_report, accuracy_score
import random

In [3]:
# Load the data through URL
URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

In [4]:
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=URL, extract=True)

In [5]:
path_to_zip

'C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted'

In [6]:
#Define the exact path
PATH = 'C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered'

In [7]:
PATH

'C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered'

In [8]:
## Directory Exploration

In [9]:
os.listdir(PATH)

['train', 'validation', 'vectorize.py']

In [10]:
os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\train')

['cats', 'dogs']

In [11]:
os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\validation')

['cats', 'dogs']

In [12]:
os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\train\\cats')

['cat.0.jpg',
 'cat.1.jpg',
 'cat.10.jpg',
 'cat.100.jpg',
 'cat.101.jpg',
 'cat.102.jpg',
 'cat.103.jpg',
 'cat.104.jpg',
 'cat.105.jpg',
 'cat.106.jpg',
 'cat.107.jpg',
 'cat.108.jpg',
 'cat.109.jpg',
 'cat.11.jpg',
 'cat.110.jpg',
 'cat.111.jpg',
 'cat.112.jpg',
 'cat.113.jpg',
 'cat.114.jpg',
 'cat.115.jpg',
 'cat.116.jpg',
 'cat.117.jpg',
 'cat.118.jpg',
 'cat.119.jpg',
 'cat.12.jpg',
 'cat.120.jpg',
 'cat.121.jpg',
 'cat.122.jpg',
 'cat.123.jpg',
 'cat.124.jpg',
 'cat.125.jpg',
 'cat.126.jpg',
 'cat.127.jpg',
 'cat.128.jpg',
 'cat.129.jpg',
 'cat.13.jpg',
 'cat.130.jpg',
 'cat.131.jpg',
 'cat.132.jpg',
 'cat.133.jpg',
 'cat.134.jpg',
 'cat.135.jpg',
 'cat.136.jpg',
 'cat.137.jpg',
 'cat.138.jpg',
 'cat.139.jpg',
 'cat.14.jpg',
 'cat.140.jpg',
 'cat.141.jpg',
 'cat.142.jpg',
 'cat.143.jpg',
 'cat.144.jpg',
 'cat.145.jpg',
 'cat.146.jpg',
 'cat.147.jpg',
 'cat.148.jpg',
 'cat.149.jpg',
 'cat.15.jpg',
 'cat.150.jpg',
 'cat.151.jpg',
 'cat.152.jpg',
 'cat.153.jpg',
 'cat.154.jpg',
 '

In [13]:
len(os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\train\\cats'))

1000

In [14]:
len(os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\train\\dogs'))

1000

In [15]:
len(os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\validation\\cats'))

500

In [16]:
len(os.listdir('C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\validation\\dogs'))

500

In [17]:
#Division into train and validation directories

In [18]:
train_dir = os.path.join(PATH, 'train')

In [19]:
train_dir

'C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\train'

In [20]:
validation_dir = os.path.join(PATH, 'validation')

In [21]:
validation_dir

'C:\\Users\\ASUS\\.keras\\datasets\\cats_and_dogs_extracted\\cats_and_dogs_filtered\\validation'

In [22]:
# Function to load, resize, normalize images
def preprocess_images(folder):
    images = []
    labels = []
    for category in ['cats', 'dogs']:
        path = os.path.join(folder, category)
        label = 0 if category == 'cats' else 1
        for filename in os.listdir(path):
            img_path = os.path.join(path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (224, 224))  # Resize to 224x224 pixels for VGG16
                img = preprocess_input(img)  # Normalize using VGG16 preprocessing
                images.append(img)
                labels.append(label)
    return np.array(images), np.array(labels)


In [23]:
# Preprocess train and validation images
X_train, y_train = preprocess_images(train_dir)
X_val, y_val = preprocess_images(validation_dir)

In [24]:
# Load pretrained VGG16 model + higher-level layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

In [25]:
model.summary()

In [None]:
# Function to extract features
def extract_features(data, model):
    features = model.predict(data)
    return features

# Extract features for train and validation datasets
train_features = extract_features(X_train, model)
val_features = extract_features(X_val, model)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 2s/step
[1m20/32[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m27s[0m 2s/step

In [None]:
model.summary()

In [None]:
# Reshape features to 2D
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)

In [None]:
# Apply PCA
pca = PCA(n_components=100)  # Adjust the number of components as needed
train_pca = pca.fit_transform(train_features_flat)
val_pca = pca.transform(val_features_flat)

In [None]:
# Create a pipeline with scaler and SVC
svc_pipeline = make_pipeline(StandardScaler(), SVC(kernel='sigmoid'))
svc_pipeline.fit(train_pca, y_train)

In [None]:
# Predict on validation set
val_preds = svc_pipeline.predict(val_pca)

In [None]:
# Evaluate the model
print(classification_report(y_val, val_preds))
print(f'Validation Accuracy: {accuracy_score(y_val, val_preds) * 100:.2f}%')

In [None]:
# Randomly select some validation samples to visualize predictions
num_samples = 10
random_indices = random.sample(range(len(X_val)), num_samples)
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for i, idx in enumerate(random_indices):
    ax = axes[i // 5, i % 5]
    ax.imshow(X_val[idx])
    true_label = 'Cat' if y_val[idx] == 0 else 'Dog'
    pred_label = 'Cat' if val_preds[idx] == 0 else 'Dog'
    ax.set_title(f'True: {true_label}\nPred: {pred_label}')
    ax.axis('off')
plt.tight_layout()
plt.show()