In [1]:
pip install opencv-python opencv-python-headless joblib scikit-learn numpy



In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [24]:
import zipfile


zip_path = '/content/drive/MyDrive/Prodigy/training_set.zip'


with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('./training_set')


In [23]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [22]:
def load_images_with_downsampling(folder_path, img_size, num_samples_per_class):

    data, labels = [], []
    cat_count, dog_count = 0, 0

    for img_name in os.listdir(folder_path):
        if 'cat' in img_name and cat_count < num_samples_per_class:
            label = 0
            cat_count += 1
        elif 'dog' in img_name and dog_count < num_samples_per_class:
            label = 1
            dog_count += 1
        else:
            continue

        img_path = os.path.join(folder_path, img_name)
        try:
            img = cv2.imread(img_path)
            img = cv2.resize(img, (img_size, img_size))
            img = img.flatten()

            data.append(img)
            labels.append(label)
        except Exception as e:
            print(f"Error reading {img_name}: {e}")

    return np.array(data), np.array(labels)


In [34]:
FOLDER_PATH = '/content/training_set/training_set/training_set'
IMG_SIZE = 64
NUM_SAMPLES_PER_CLASS = 2500

In [36]:
def load_images_from_subfolders(folder_path, img_size, num_samples_per_class):
    data, labels = [], []
    class_map = {'cats': 0, 'dogs': 1}

    for class_folder in os.listdir(folder_path):
        class_path = os.path.join(folder_path, class_folder)
        if not os.path.isdir(class_path):
            continue

        count = 0
        for img_name in os.listdir(class_path):
            if count >= num_samples_per_class:
                break

            img_path = os.path.join(class_path, img_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_size, img_size))
                img = img.flatten()
                data.append(img)
                labels.append(class_map[class_folder])
                count += 1
            except Exception as e:
                print(f"Error reading {img_name}: {e}")

    return np.array(data), np.array(labels)

X, y = load_images_from_subfolders(FOLDER_PATH, IMG_SIZE, NUM_SAMPLES_PER_CLASS)

if X.size == 0 or y.size == 0:
    print("Error: Dataset is empty. Please check the folder path or file contents.")
else:
    X = X / 255.0
    print(f"Dataset Size: {X.shape[0]} images")
    print(f"Labels Shape: {y.shape}")

Error reading _DS_Store: OpenCV(4.11.0) /io/opencv/modules/imgproc/src/resize.cpp:4208: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Error reading _DS_Store: OpenCV(4.11.0) /io/opencv/modules/imgproc/src/resize.cpp:4208: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Dataset Size: 5000 images
Labels Shape: (5000,)


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print(f"Training Set Size: {X_train.shape[0]}")
print(f"Testing Set Size: {X_test.shape[0]}")


Training Set Size: 4000
Testing Set Size: 1000


In [38]:
print("\nTraining the SVM model...")
svm = SVC(kernel='rbf', C=1, gamma='scale')
svm.fit(X_train, y_train)
print("Training Complete!")


Training the SVM model...
Training Complete!


In [39]:
y_pred = svm.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")


Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.62      0.63       500
           1       0.64      0.67      0.65       500

    accuracy                           0.64      1000
   macro avg       0.64      0.64      0.64      1000
weighted avg       0.64      0.64      0.64      1000

Accuracy: 64.30%


In [40]:
joblib.dump(svm, '/content/drive/MyDrive/svm_model.pkl')

['/content/drive/MyDrive/svm_model.pkl']

In [41]:
try:
    svm = joblib.load('/content/drive/MyDrive/svm_model.pkl')
except FileNotFoundError:
    print("Model file not found. Please check the file path.")