In [14]:
import os
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle

# Define the input directories for cats and dogs
cat_dir = r"C:/Users/phgtk/Datasets/Cat"
dog_dir = r"C:/Users/phgtk/Datasets/Dog"

# Initialize data and labels lists
data = []
labels = []

# Set the image size
img_size = (64, 64)  # Resize images to 64x64 pixels

# Function to load images from a directory and assign a label
def load_images_from_directory(directory, label):
    for file in os.listdir(directory):
        img_path = os.path.join(directory, file)
        if img_path.endswith('.jpg') or img_path.endswith('.jpeg') or img_path.endswith('.png'):  # Only process image files
            img = imread(img_path)
            img = resize(img, img_size)  # Resize images
            data.append(img.flatten())
            labels.append(label)

# Load cat images (label = 0)
load_images_from_directory(cat_dir, 0)

#Load dog images (label = 1)
load_images_from_directory(dog_dir, 1)

# Convert lists to numpy arrays
data = np.asarray(data)
labels = np.asarray(labels)

# Print the shapes of the data and labels arrays to confirm successful loading
print(f"Data shape: {data.shape}")
print(f"Labels shape: {labels.shape}")




Data shape: (202, 12288)
Labels shape: (202,)


In [15]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

print(len(y_test))

41


In [16]:
# Initialize the SVM classifier
svc = SVC(probability=True)
param_grid = {
    'C': [0.1, 1, 10, 100],       # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001], # Kernel coefficient for 'rbf'
    'kernel': ['linear', 'rbf']   # Kernel type
}


# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_estimator = grid_search.best_estimator_

# Print the best parameters found by GridSearchCV
print(f"Best parameters found: {grid_search.best_params_}")

y_pred = grid_search.predict(X_test)


# Calculate the accuracy of the predictions
score = accuracy_score(y_pred, y_test)
print(score)

# Print the accuracy
print('{}% of samples were correctly classified'.format(str(score * 100)))

# Save the best estimator using pickle
pickle.dump(svc, open('cat_dog_classifier_model.p', 'wb'))


Fitting 5 folds for each of 32 candidates, totalling 160 fits
Best parameters found: {'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
0.5365853658536586
53.65853658536586% of samples were correctly classified


In [18]:
def classify_image(image_path, model_path='cat_dog_classifier_model.p'):
    # Load the trained model
    grid_search.fit(X_train, y_train)
    
    # Read and preprocess the image
    img = imread(image_path)
    img = resize(img, img_size)  # Resize to the same size as training images
    img_flattened = img.flatten().reshape(1, -1)  # Flatten and reshape for the model
    
    # Make a prediction
    prediction = grid_search.predict(img_flattened)
    label = 'Cat' if prediction == 0 else 'Dog'
    
    return label

# Example usage
image_path = r"C:/Users/phgtk/Datasets/Cat/3.jpg"
label = classify_image(image_path)
print(f'The image is classified as: {label}')

Fitting 5 folds for each of 32 candidates, totalling 160 fits
The image is classified as: Cat
