In [1]:
print("Hello Data")

Hello Data


### Importing Required Libraries

In [2]:
import os
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

### Loading Images from our Dataset

In [3]:
IMAGE_SIZE = (128, 128)  # Resize images to this size

In [4]:
def load_images_from_folder(folder, label, image_size=(128, 128), max_images=1000):
    images = []
    labels = []
    
    # Filter only valid image files
    valid_extensions = ('.png', '.jpg', '.jpeg')
    files = [f for f in os.listdir(folder) if f.lower().endswith(valid_extensions)]
    
    # Limit the number of images
    files = files[:max_images]

    print(f"Loading up to {max_images} images from {folder}...")

    for filename in tqdm(files, desc=f"Processing {folder}"):
        path = os.path.join(folder, filename)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype(np.float32) / 255.0
            images.append(img)
            labels.append(label)
        else:
            print(f"Warning: Could not read image {path}")
    
    return np.array(images), np.array(labels)

In [28]:
cat_images, cat_labels = load_images_from_folder('train/cats', 0)
dog_images, dog_labels = load_images_from_folder('train/dogs', 1)

Loading up to 1000 images from train/cats...


Processing train/cats: 100%|██████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 729.23it/s]


Loading up to 1000 images from train/dogs...


Processing train/dogs: 100%|██████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 319.08it/s]


### Feature Engineering 

In [29]:
X = y = np.concatenate((cat_images, dog_images))
y = np.concatenate((cat_labels, dog_labels))

In [30]:
unique, counts = np.unique(y, return_counts=True)
print(dict(zip(unique, counts)))

{0: 1000, 1: 1000}


In [31]:
y

array([0, 0, 0, ..., 1, 1, 1])

In [33]:
# Flatten images
X_flattened = X.reshape(len(X), -1)

### Splitting Data into training and test set

In [34]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.2, random_state=42)

### Training our svm Model 

In [35]:
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)

SVC(kernel='linear')

### Making Pridiction on Test set

In [36]:
y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred, target_names=["Cat", "Dog"]))

              precision    recall  f1-score   support

         Cat       0.48      0.55      0.52       199
         Dog       0.49      0.42      0.45       201

    accuracy                           0.48       400
   macro avg       0.49      0.49      0.48       400
weighted avg       0.49      0.48      0.48       400



### Saving our Model

In [37]:
import joblib

# After training your svm model
joblib.dump(svm, 'svm_cat_dog_model.joblib')
print("Model saved successfully!")

Model saved successfully!
