In [1]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm
import joblib
from sklearn.model_selection import GridSearchCV
import cv2
import seaborn as sns
import time
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [4]:
!pip install opendatasets

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Collecting kaggle (from opendatasets)
  Downloading kaggle-1.6.17.tar.gz (82 kB)
     ---------------------------------------- 0.0/82.7 kB ? eta -:--:--
     ---------------------------------------- 82.7/82.7 kB 4.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting python-slugify (from kaggle->opendatasets)
  Downloading python_slugify-8.0.4-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting text-unidecode>=1.3 (from python-slugify->kaggle->opendatasets)
  Downloading text_unidecode-1.3-py2.py3-none-any.whl.metadata (2.4 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Downloading python_slugify-8.0.4-py2.py3-none-any.whl (10 kB)
Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)
   ---------------------------------------- 0.0/78.2 kB ? eta -:--:--
   ---------------------------------------- 78.2

In [5]:
import opendatasets as od 
import pandas 

od.download("https://www.kaggle.com/datasets/salader/dogs-vs-cats") 


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:

  namratamewada


Your Kaggle Key:

  ········


Dataset URL: https://www.kaggle.com/datasets/salader/dogs-vs-cats
Downloading dogs-vs-cats.zip to .\dogs-vs-cats


100%|█████████████████████████████████████████████████████████████████████████████| 1.06G/1.06G [04:59<00:00, 3.81MB/s]





In [38]:
# Function to load images and labels
def load_images_from_folder(folder, label):
    features = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
           img_resize = cv2.resize(img, (50,50))
           img_normalized = img_resize / 255.0
           image_flatten = img_normalized.flatten()
           features.append(image_flatten)
           labels.append(label)
    return features, labels

In [39]:
# Load training data
train_cats, train_labels_cats = load_images_from_folder('dogs-vs-cats/train/cats', 0)
train_dogs, train_labels_dogs = load_images_from_folder('dogs-vs-cats/train/dogs', 1)

In [40]:
# Combine cat and dog training data
X_train = np.array(train_cats + train_dogs)
y_train = np.array(train_labels_cats + train_labels_dogs)

In [41]:
# Load test data
test_cats, test_labels_cats = load_images_from_folder('dogs-vs-cats/test/cats', 0)
test_dogs, test_labels_dogs = load_images_from_folder('dogs-vs-cats/test/dogs', 1)

In [42]:
# Combine cat and dog test data
X_test = np.array(test_cats + test_dogs)
y_test = np.array(test_labels_cats + test_labels_dogs)

In [43]:
# PCA, SVM, & Pipeline
n_components = 0.8
pca = PCA(n_components=n_components)
svm = SVC()
pca = PCA(n_components=n_components, random_state=42)
pipeline = Pipeline([
    ('pca', pca),
    ('svm', svm)
])

In [44]:
param_grid = {
    'pca__n_components': [2, 1, 0.9, 0.8],
    'svm__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
}

In [14]:
start_time = time.time()

grid_search = GridSearchCV(pipeline, param_grid, cv=3, verbose=4)
grid_search.fit(X_train, y_train)

end_time = time.time()

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV 1/3] END pca__n_components=2, svm__kernel=linear;, score=0.525 total time= 1.2min
[CV 2/3] END pca__n_components=2, svm__kernel=linear;, score=0.531 total time=  60.0s
[CV 3/3] END pca__n_components=2, svm__kernel=linear;, score=0.527 total time=  55.0s
[CV 1/3] END pca__n_components=2, svm__kernel=rbf;, score=0.564 total time=  50.4s
[CV 2/3] END pca__n_components=2, svm__kernel=rbf;, score=0.565 total time=  49.9s
[CV 3/3] END pca__n_components=2, svm__kernel=rbf;, score=0.568 total time=  49.3s
[CV 1/3] END pca__n_components=2, svm__kernel=poly;, score=0.481 total time=  38.6s
[CV 2/3] END pca__n_components=2, svm__kernel=poly;, score=0.494 total time=  40.0s
[CV 3/3] END pca__n_components=2, svm__kernel=poly;, score=0.489 total time=  37.5s
[CV 1/3] END pca__n_components=2, svm__kernel=sigmoid;, score=0.499 total time=  28.9s
[CV 2/3] END pca__n_components=2, svm__kernel=sigmoid;, score=0.490 total time=  27.1s
[CV 3/

In [45]:
best_pipeline = grid_search.best_estimator_
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters: ", best_params)
print("Best Score: ", best_score)

Best Parameters:  {'pca__n_components': 0.9, 'svm__kernel': 'rbf'}
Best Score:  0.671649863351417


In [46]:
# Evaluation on test dataset
accuracy = best_pipeline.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.693


In [47]:
y_pred = best_pipeline.predict(X_test)

# classification report
target_names = ['Cat', 'Dog']
classification_rep = classification_report(y_test, y_pred, target_names=target_names)
print("Classification Report:\n", classification_rep)

Classification Report:
               precision    recall  f1-score   support

         Cat       0.69      0.71      0.70      2500
         Dog       0.70      0.68      0.69      2500

    accuracy                           0.69      5000
   macro avg       0.69      0.69      0.69      5000
weighted avg       0.69      0.69      0.69      5000



In [63]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1770  730]
 [ 805 1695]]


In [62]:
print(matplotlib.__version__)

3.8.0
