# Computer Vision Project

For our Computer Vision (CV) project, we utilized an image dataset obtained from [Kaggle](https://www.kaggle.com/datasets/pavansanagapati/images-dataset/data). The dataset includes:

- Cats: 200 images
- Dogs: 100 images
- Horses: 150 images

This dataset serves as the foundation for our multi-class image classification task, where we aim to design a comprehensive pipeline using Python along with libraries like NumPy, OpenCV, TensorFlow, etc. The pipeline covers data preparation, feature extraction using both low-level and high-level techniques, dimensionality reduction, and classification using Support Vector Machines (SVM) to achieve accurate classification results.


# Data Preparation with OpenCV and NumPy

# 1) Data Augmentation

We balanced the distribution of images across classes by augmenting the dataset, ensuring each category has sufficient data for training. Using OpenCV, we implemented image augmentation techniques like rotation and flipping to create diverse variations of images, addressing class imbalances effectively. Additionally, we applied common image preprocessing techniques such as normalization, histogram equalization, and denoising to enhance image quality before feature extraction. These strategies collectively improved the dataset's quality and balance, making it suitable for computer vision tasks like classification or object detection.

In [2]:
import os
import cv2
import numpy as np
from random import choice

data_folder = 'C:/Users/irsha/Downloads/CV/data/'
folders = ['cats', 'dogs', 'horses']
target_count = 200

# performing image augmentation
def augment_images(image_folder, target_count):
    images = os.listdir(image_folder)
    num_images = len(images)
    if num_images >= target_count:
        return  
    num_augmentations = target_count - num_images
    for i in range(num_augmentations):
        image_name = choice(images)
        image_path = os.path.join(image_folder, image_name)
        image = cv2.imread(image_path)
        
        if image is not None:
    
            angle = np.random.randint(-15, 15)
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0) #rotation of the images
            rotated_image = cv2.warpAffine(image, M, (w, h))
            
            flip_code = choice([-1, 0, 1])
            flipped_image = cv2.flip(rotated_image, flip_code) #flipping of the images
            
            scale_factor = np.random.uniform(0.9, 1.1)
            scaled_image = cv2.resize(flipped_image, None, fx=scale_factor, fy=scale_factor) #random scaling of pictures

            scaled_image = cv2.resize(scaled_image, (w, h)) #resizing the images to original size

            cv2.imwrite(os.path.join(image_folder, f'augmented_{i}.jpg'), scaled_image)
            images.append(f'augmented_{i}.jpg')

def preprocess_image(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    normalized_image = cv2.normalize(gray_image, None, 0, 255, cv2.NORM_MINMAX)
    equalized_image = cv2.equalizeHist(normalized_image)
    denoised_image = cv2.fastNlMeansDenoising(equalized_image, h=10)
    return denoised_image

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    augment_images(folder_path, target_count)
    print(f'Image augmentation complete for "{folder}". Total images: {len(os.listdir(folder_path))}')

    # loop to preprocess all images
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path)
        if image is not None:
            preprocessed_image = preprocess_image(image)
            cv2.imwrite(img_path, preprocessed_image)
    print(f'Image preprocessing complete for "{folder}".')

print('Data preparation process complete for all folders.')


Image augmentation complete for "cats". Total images: 200
Image preprocessing complete for "cats".
Image augmentation complete for "dogs". Total images: 200
Image preprocessing complete for "dogs".
Image augmentation complete for "horses". Total images: 200
Image preprocessing complete for "horses".
Data preparation process complete for all folders.


# 2) Feature Extraction with OpenCV and TensorFlow

Low-level features are extracted using Histogram of Oriented Gradients (HOG) and Local Binary Patterns (LBP), while high-level features are obtained using a pre-trained deep Convolutional Neural Network (CNN). These features are then combined and subjected to dimensionality reduction before training a Support Vector Machine (SVM) classifier. 
 HOG captures edge and gradient structure information by computing gradients in localized portions of an image. This makes it highly effective for object detection and recognition tasks where shape and texture are crucial.
  LBP is a texture descriptor that characterizes the local texture of an image by thresholding the neighborhood of each pixel and converting the result into a binary number. It is particularly useful for texture classification.

In [35]:
import os
import cv2
import numpy as np
from skimage.feature import local_binary_pattern

data_folder = 'C:/Users/irsha/Downloads/CV/data/'
folders = ['cats', 'dogs', 'horses']

def compute_hog_features(image):
    win_size = (64, 64)
    block_size = (16, 16)
    block_stride = (8, 8)
    cell_size = (8, 8)
    nbins = 9

    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(image)
    return h

def compute_lbp_features(image):
    radius = 3
    n_points = 8 * radius #number of sampling points
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        if image is not None:
            hog_features = compute_hog_features(image)
            lbp_features = compute_lbp_features(image)
            
            print(f'HOG features shape for {img_name}: {hog_features.shape}')
            print(f'LBP features shape for {img_name}: {lbp_features.shape}')


HOG features shape for cat.1.jpg: (1481760,)
LBP features shape for cat.1.jpg: (26,)
HOG features shape for cat.10.jpg: (5239080,)
LBP features shape for cat.10.jpg: (26,)
HOG features shape for cat.100.jpg: (4171860,)
LBP features shape for cat.100.jpg: (26,)
HOG features shape for cat.101.jpg: (952560,)
LBP features shape for cat.101.jpg: (26,)
HOG features shape for cat.102.jpg: (2222640,)
LBP features shape for cat.102.jpg: (26,)
HOG features shape for cat.103.jpg: (3977820,)
LBP features shape for cat.103.jpg: (26,)
HOG features shape for cat.104.jpg: (1778112,)
LBP features shape for cat.104.jpg: (26,)
HOG features shape for cat.105.jpg: (469224,)
LBP features shape for cat.105.jpg: (26,)
HOG features shape for cat.106.jpg: (2958228,)
LBP features shape for cat.106.jpg: (26,)
HOG features shape for cat.107.jpg: (3492720,)
LBP features shape for cat.107.jpg: (26,)
HOG features shape for cat.108.jpg: (3298680,)
LBP features shape for cat.108.jpg: (26,)
HOG features shape for cat.10

Utilize pre-trained deep Convolutional Neural Networks (CNNs) from TensorFlow's Keras API for
feature extraction. Describe how you would extract deep CNN features from images.

To extract deep Convolutional Neural Network (CNN) features from images, 

- Choose a Pre-trained CNN Model: Select a pre-trained CNN model suitable. Common choices include VGG16, VGG19, ResNet, Inception, etc. These models are trained on large-scale datasets like ImageNet and have learned to extract high-level features from images.

- Load the Pre-trained Model: Use TensorFlow's Keras API to load the pre-trained model. This is done with a single line of code using functions like VGG16() or ResNet50().

- Prepare the Image: Load the image using a suitable image loading function such as tensorflow.keras.preprocessing.image.load_img(). Resize the image to match the input size expected by the model (e.g., 224x224 pixels for VGG16).

- Preprocess the Image: Convert the image to an array using tensorflow.keras.preprocessing.image.img_to_array(). Expand the dimensions of the image array to create a batch of size 1. Preprocess the image data to match the input preprocessing used during the training of the CNN model. This typically involves normalization.

- Extract Features: Pass the preprocessed image through the loaded CNN model using model.predict(). This will extract features from the image at one of the intermediate layers of the CNN, depending on your choice. The earlier layers capture low-level features like edges and textures, while deeper layers capture higher-level features.

- Use Extracted Features: The output of model.predict() will be a feature vector representing the image in the feature space of the CNN. This vector can be used as input to identify tasks such as classification, object detection, or image retrieval.


In [6]:
import os
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np

data_folder = 'C:/Users/soban/Desktop/CV/data'
folders = ['cats', 'dogs', 'horses']

def extract_cnn_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224)) #resizing the image to the target size of (224,224) pixels since it is the input size expected by the model
    img_data = image.img_to_array(img) 
    img_data = np.expand_dims(img_data, axis=0) #expanding the dimensions of the image array size
    img_data = preprocess_input(img_data)
    features = model.predict(img_data) #passing the preprocessed image to the model using predict, this will generate features corresponding to the input image
    return features

# for this code we are using pre-trained CNN Model known as VGG-16
base_model = VGG16(weights='imagenet')  #This line loads the pre-trained VGG16 model from Keras with weights pre-trained on the ImageNet dataset.
#When weights parameter is set to 'imagenet', it initializes the model with weights that were trained on ImageNet, a large dataset with millions of images across thousands of classes.

model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output) #this line is used to output the activations of the layer named fc1.
#this layer represents the first fully connected layer of vgg16 that contains high level features learned by the network.

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        features = extract_cnn_features(img_path, model)
        
        print(f'CNN features shape for {img_name}: {features.shape}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step
CNN features shape for cat.1.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393ms/step
CNN features shape for cat.10.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388ms/step
CNN features shape for cat.100.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 399ms/step
CNN features shape for cat.101.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step
CNN features shape for cat.102.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406ms/step
CNN features shape for cat.103.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 391ms/step
CNN features shape for cat.104.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 409ms/step
CNN features shape for cat.105.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 473ms/step
CNN features shape for cat.161.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 430ms/step
CNN features shape for cat.162.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 411ms/step
CNN features shape for cat.163.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 438ms/step
CNN features shape for cat.164.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step
CNN features shape for cat.165.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step
CNN features shape for cat.166.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step
CNN features shape for cat.167.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step
CNN features shape for cat.168.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 496ms/step
CNN features shape for augmented_15.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470ms/step
CNN features shape for augmented_16.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 481ms/step
CNN features shape for augmented_17.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step
CNN features shape for augmented_18.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 478ms/step
CNN features shape for augmented_19.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 485ms/step
CNN features shape for augmented_2.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step
CNN features shape for augmented_20.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 494ms/step
CNN features shape for augmented_21.jpg: (1, 4096)
[1m1/1[

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488ms/step
CNN features shape for dog.46.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 490ms/step
CNN features shape for dog.47.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 486ms/step
CNN features shape for dog.48.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 487ms/step
CNN features shape for dog.49.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498ms/step
CNN features shape for dog.5.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459ms/step
CNN features shape for dog.50.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498ms/step
CNN features shape for dog.51.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459ms/step
CNN features shape for dog.52.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 515ms/step
CNN features shape for augmented_18.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499ms/step
CNN features shape for augmented_19.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 495ms/step
CNN features shape for augmented_2.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 532ms/step
CNN features shape for augmented_20.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483ms/step
CNN features shape for augmented_21.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482ms/step
CNN features shape for augmented_22.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492ms/step
CNN features shape for augmented_23.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 500ms/step
CNN features shape for augmented_24.jpg: (1, 4096)
[1m1/1[

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step
CNN features shape for horse-123.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 476ms/step
CNN features shape for horse-124.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 522ms/step
CNN features shape for horse-125.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 490ms/step
CNN features shape for horse-126.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 504ms/step
CNN features shape for horse-127.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 518ms/step
CNN features shape for horse-128.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 473ms/step
CNN features shape for horse-129.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505ms/step
CNN features shape for horse-13.jpg: (1, 4096)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━

# 3) Dimensionality Reduction with NumPy and scikit-learn

PCA is effective in reducing the feature space while retaining as much variance as possible. However, it does not explicitly consider class labels, so it may not preserve class discriminative information optimally.
LDA explicitly models the class labels and aims to find a lower-dimensional space where the classes are well-separated. It often outperforms PCA in preserving class discriminative information.
QDA can capture more complex relationships between features and class labels compared to LDA. It may be more effective when the underlying data distributions are non-linear or when classes have different covariance structures.
In summary, while PCA is effective in reducing the feature space, it may not preserve class discriminative information optimally compared to LDA and QDA. LDA explicitly considers class labels and is effective when classes are well-separated linearly. QDA extends LDA to handle non-linear relationships and different covariance structures among classes, making it suitable for more complex data distributions. The choice of method depends on the specific characteristics of the data.

**Independent Component Analysis (ICA)** in image processing separates mixed images into their original independent components. It's like unmixing a picture made of overlapping textures, objects, or lighting conditions. ICA helps isolate textures, recognize objects, remove lighting effects, and decompose images for analysis or fusion. It uncovers hidden factors assuming they are statistically independent, aiding tasks like image enhancement and pattern recognition."




In [15]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA, QuadraticDiscriminantAnalysis as QDA
import os
import cv2
from skimage.feature import local_binary_pattern
from skimage.transform import resize
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

# Define functions to compute HOG and LBP features
def compute_hog_features(image):
    win_size = (64, 64)
    block_size = (16, 16)
    block_stride = (8, 8)
    cell_size = (8, 8)
    nbins = 9

    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(image)
    return h

def compute_lbp_features(image):
    radius = 3
    n_points = 8 * radius  # Number of sampling points
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

# Load pre-trained VGG16 model for CNN feature extraction
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

# Define data folders and process images
data_folder = 'C:/Users/soban/Desktop/CV/data'
folders = ['cats', 'dogs', 'horses']

X = []
y = []

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        if image is not None:
            resized_image = resize(image, (64, 64)).astype(np.uint8)
            
            hog_features = compute_hog_features(resized_image)
            lbp_features = compute_lbp_features(resized_image)
            
            combined_features = np.hstack((hog_features.flatten(), lbp_features.flatten()))
            
            X.append(combined_features)
            y.append(folder)

X = np.array(X)
y = np.array(y)

# Principal Component Analysis (PCA)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(f"PCA components shape: {X_pca.shape}")

# Linear Discriminant Analysis (LDA)
lda = LDA(n_components=2)
X_lda = lda.fit_transform(X, y)
print(f"LDA components shape: {X_lda.shape}")

# Quadratic Discriminant Analysis (QDA)
qda = QDA()
qda.fit(X, y)
predictions = qda.predict(X)
print(f"QDA predictions: {predictions[:5]}")


PCA components shape: (600, 2)
LDA components shape: (600, 2)
QDA predictions: ['dogs' 'dogs' 'dogs' 'dogs' 'dogs']




# 4) Classification using SVM with scikit-learn

Working of SVM
Linear SVM:

For linearly separable data, SVM finds the hyperplane that maximizes the margin between classes.
The decision function is determined by the dot product of feature vectors and a weight vector plus a bias term.
The goal is to minimize the hinge loss, which penalizes misclassifications.

Non-linear SVM:
SVM can be extended to handle non-linearly separable data by using kernel functions.
Kernel functions transform the input features into a higher-dimensional space, where the classes might become separable by a hyperplane.
Common kernel functions include polynomial, radial basis function (RBF), and sigmoid kernels.

Suitability for Image Classification
SVMs are suitable for image classification tasks for several reasons:

Effective with Reduced Feature Spaces: SVMs can handle high-dimensional feature spaces efficiently, making them suitable for image data.
Robustness to Overfitting: SVMs are less prone to overfitting, especially when using regularization parameters.
Kernel Trick for Non-linearity: SVMs can handle non-linear decision boundaries using kernel functions, which is useful for image data with complex class distributions.

Training an SVM Classifier with Reduced Feature Spaces

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from skimage.transform import resize
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def compute_hog_features(image):
    win_size = (64, 64)
    block_size = (16, 16)
    block_stride = (8, 8)
    cell_size = (8, 8)
    nbins = 9

    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(image)
    return h.flatten()

def compute_lbp_features(image):
    radius = 3
    n_points = 8 * radius  # Number of sampling points
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist.flatten()

# Function to preprocess an image
def preprocess_image(img_path):
    # Load image using OpenCV
    img = cv2.imread(img_path)
    if img is None:
        raise FileNotFoundError(f"Image not found at path: {img_path}")

    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    normalized_image = cv2.normalize(gray_image, None, 0, 255, cv2.NORM_MINMAX)
    equalized_image = cv2.equalizeHist(normalized_image)
    
    # converting the images to CV_8U or CV_8UC3
    converted_image = cv2.convertScaleAbs(equalized_image)
    return converted_image

data_folder = 'C:/Users/soban/Desktop/CV/data'
folders = ['cats', 'dogs', 'horses']
X = []
y = []

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    for file_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, file_name)
        try:
            img = preprocess_image(img_path)
            hog_features = compute_hog_features(img)
            lbp_features = compute_lbp_features(img)
            combined_features = np.concatenate((hog_features, lbp_features))
            X.append(combined_features)
            y.append(folder)
        except Exception as e:
            print(f"Error processing image: {img_path}")
            print(e)

X = np.array(X)
y = np.array(y)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

svm_classifier = SVC(kernel='linear')

svm_classifier.fit(X_train, y_train)

y_pred = svm_classifier.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of linear SVM: {accuracy}")


Experimenting with Different Kernel Functions

In [1]:
svm_linear = svm.SVC(kernel='linear')
svm_poly = svm.SVC(kernel='poly', degree=3)  # Polynomial kernel with degree 3
svm_rbf = svm.SVC(kernel='rbf')  # Radial basis function kernel

svm_linear.fit(X_train, y_train)
svm_poly.fit(X_train, y_train)
svm_rbf.fit(X_train, y_train)

y_pred_linear = svm_linear.predict(X_test)
y_pred_poly = svm_poly.predict(X_test)
y_pred_rbf = svm_rbf.predict(X_test)

accuracy_linear = accuracy_score(y_test, y_pred_linear)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)

print(f"Accuracy of linear SVM: {accuracy_linear}")
print(f"Accuracy of polynomial SVM: {accuracy_poly}")
print(f"Accuracy of RBF SVM: {accuracy_rbf}")


NameError: name 'svm' is not defined

Impact of Kernel Functions on Classification Performance

Linear Kernel: Works well for linearly separable data but may underperform on complex datasets with non-linear boundaries.
Polynomial Kernel: Suitable for data with polynomial decision boundaries. The degree parameter controls the degree of the polynomial.
RBF Kernel: Provides flexibility to capture complex decision boundaries. The gamma parameter controls the smoothness of the decision boundary, with smaller values leading to smoother boundaries.

# 5) Integration and Evaluation

In [41]:
import os
import cv2
import numpy as np
from random import choice
from skimage.feature import local_binary_pattern
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import preprocess_input

data_folder = 'C:/Users/soban/Desktop/CV/data'
folders = ['cats', 'dogs', 'horses']
target_count = 200

def augment_images(image_folder, target_count):
    images = os.listdir(image_folder)
    num_images = len(images)
    if num_images >= target_count:
        return  
    num_augmentations = target_count - num_images
    for i in range(num_augmentations):
        image_name = choice(images)
        image_path = os.path.join(image_folder, image_name)
        image = cv2.imread(image_path)
        if image is not None:
            angle = np.random.randint(-15, 15) # performing rotation with random angle within [-15, 15] degrees
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated_image = cv2.warpAffine(image, M, (w, h))
            cv2.imwrite(os.path.join(image_folder, f'augmented_{i}.jpg'), rotated_image)
            images.append(f'augmented_{i}.jpg')

def compute_hog_features(image):
    win_size = (64, 64)
    block_size = (16, 16)
    block_stride = (8, 8)
    cell_size = (8, 8)
    nbins = 9

    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(image)
    return h

def compute_lbp_features(image):
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

#using VGG16 as pre trained model
def extract_cnn_features(img_path, model):
    img = keras_image.load_img(img_path, target_size=(224, 224))
    img_data = keras_image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features.flatten()

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    augment_images(folder_path, target_count)
    print(f'Image augmentation complete for "{folder}". Total images: {len(os.listdir(folder_path))}')

# Feature extraction and dimensionality reduction

features_list, labels_list = [], []
label_map = {folder: idx for idx, folder in enumerate(folders)}

base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    label = label_map[folder]
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path)
        
        if image is not None:
            resized_image = cv2.resize(image, (64, 64))  # Resize for HOG and LBP
            gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
            hog_features = compute_hog_features(gray_image)
            lbp_features = compute_lbp_features(gray_image)
            cnn_features = extract_cnn_features(img_path, model)
            
            combined_features = np.hstack((hog_features.flatten(), lbp_features.flatten(), cnn_features))
            features_list.append(combined_features)
            labels_list.append(label)

X = np.array(features_list)
y = np.array(labels_list)

# applying PCA for dimensionality reduction
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X)

# splitting data and training SVM
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

svm_classifier = svm.SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=folders)
print(f"Accuracy: {accuracy}")
print(report)


Image augmentation complete for "cats". Total images: 200
Image augmentation complete for "dogs". Total images: 200
Image augmentation complete for "horses". Total images: 200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 571ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step
[1m1/1[0m [32m━━━━

**Overall Accuracy**: The classifier achieved an accuracy of 0.875, meaning it correctly predicted the class for approximately 87.5% of the images.

**Class-wise Performance**:

- Cats: Precision, recall, and F1-score for the "cats" class are 0.81, 0.88, and 0.84 respectively. This indicates that out of all images predicted as "cats," 81% were actually cats. Moreover, it correctly identified 88% of the actual cat images, and the F1-score, which is the harmonic mean of precision and recall, is 0.84.
- Dogs: Precision is 0.87, recall is 0.75, and F1-score is 0.81 for the "dogs" class. This means that 87% of the predicted dog images were actually dogs, and 75% of the actual dog images were correctly identified. The F1-score reflects the balance between precision and recall.
- Horses: High precision, recall, and F1-score are observed for the "horses" class, with values of 0.95, 0.98, and 0.96 respectively. This indicates exceptional performance in identihorse imagesi
mages.

1) Macro Average (0.87): This value represents the average of precision, recall, and F1-score across all classes without considering class imbalances. It gives an overall indication of the model's performance across all classes, treating each class equally.

2) Weighted Average (0.88): The weighted average considers the number of instances in each class, providing a more accurate representation of the model's overall performance across the dataset. It is similar to the macro average but gives more weight to classes with more instances.s o.erall.ns.

In [4]:
import os
import cv2
import numpy as np
from random import choice
from skimage.feature import local_binary_pattern
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import preprocess_input
import pickle

data_folder = 'C:/Users/irsha/Downloads/CV/data/'
folders = ['cats', 'dogs', 'horses']
target_count = 200

def augment_images(image_folder, target_count):
    images = os.listdir(image_folder)
    num_images = len(images)
    if num_images >= target_count:
        return
    num_augmentations = target_count - num_images
    for i in range(num_augmentations):
        image_name = choice(images)
        image_path = os.path.join(image_folder, image_name)
        image = cv2.imread(image_path)
        if image is not None:
            angle = np.random.randint(-15, 15)  # performing rotation with random angle within [-15, 15] degrees
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated_image = cv2.warpAffine(image, M, (w, h))
            cv2.imwrite(os.path.join(image_folder, f'augmented_{i}.jpg'), rotated_image)
            images.append(f'augmented_{i}.jpg')

def compute_hog_features(image):
    win_size = (64, 64)
    block_size = (16, 16)
    block_stride = (8, 8)
    cell_size = (8, 8)
    nbins = 9

    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(image)
    return h

def compute_lbp_features(image):
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

# using VGG16 as pre-trained model
def extract_cnn_features(img_path, model):
    img = keras_image.load_img(img_path, target_size=(224, 224))
    img_data = keras_image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features.flatten()

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    augment_images(folder_path, target_count)
    print(f'Image augmentation complete for "{folder}". Total images: {len(os.listdir(folder_path))}')

# Feature extraction and dimensionality reduction
features_list=[] 
labels_list = []
label_map = {folder: idx for idx, folder in enumerate(folders)}

base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

for folder in folders:
    folder_path = os.path.join(data_folder, folder)
    label = label_map[folder]
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path)
        
        if image is not None:
            resized_image = cv2.resize(image, (64, 64))  # Resize for HOG and LBP
            gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
            hog_features = compute_hog_features(gray_image)
            lbp_features = compute_lbp_features(gray_image)
            cnn_features = extract_cnn_features(img_path, model)
            
            combined_features = np.hstack((hog_features.flatten(), lbp_features.flatten(), cnn_features))
            features_list.append(combined_features)
            labels_list.append(label)

X = np.array(features_list)
y = np.array(labels_list)

# Applying PCA for dimensionality reduction
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X)

# Splitting data and training SVM
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

svm_classifier = svm.SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=folders)
print(f"Accuracy: {accuracy}")
print(report)

with open('pca_transformer.pkl', 'wb') as pca_file:
    pickle.dump(pca, pca_file)

with open('svm_model.pkl', 'wb') as svm_file:
    pickle.dump(svm_classifier, svm_file)

def classify_image(img_path, model, pca, svm_classifier):
    image = cv2.imread(img_path)
    
    if image is not None:
        resized_image = cv2.resize(image, (64, 64))  # Resize for HOG and LBP
        gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
        hog_features = compute_hog_features(gray_image)
        lbp_features = compute_lbp_features(gray_image)
        cnn_features = extract_cnn_features(img_path, model)
        
        combined_features = np.hstack((hog_features.flatten(), lbp_features.flatten(), cnn_features))
        combined_features = combined_features.reshape(1, -1)
        
        # Apply PCA transformation
        features_pca = pca.transform(combined_features)
        
        # Classify with SVM
        prediction = svm_classifier.predict(features_pca)
        
        label_map_reverse = {v: k for k, v in label_map.items()}
        predicted_label = label_map_reverse[prediction[0]]
        return predicted_label
    else:
        return "Image not found or could not be read"

with open('pca_transformer.pkl', 'rb') as pca_file:
    pca = pickle.load(pca_file)

with open('svm_model.pkl', 'rb') as svm_file:
    svm_classifier = pickle.load(svm_file)
    
test_image_path = 'C:\\Users\\irsha\\Downloads\\CV\\data\\cats\\cat.95.jpg'
predicted_class = classify_image(test_image_path, model, pca, svm_classifier)
print(f'The test image is classified as: {predicted_class}')



Image augmentation complete for "cats". Total images: 200
Image augmentation complete for "dogs". Total images: 200
Image augmentation complete for "horses". Total images: 200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 326ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━