In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os

zip_path = "/content/drive/MyDrive/Cat and Dog Dataset/Dataset.zip"
extract_path = "/content/dataset"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


In [None]:
import os

print(os.listdir("/content/dataset"))


['Dataset']


### Model


In [None]:

import cv2
import numpy as np
from skimage.feature import hog

IMG_SIZE = 128

def extract_hog_features(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)

    features = hog(
        gray,
        orientations=9,
        pixels_per_cell=(16, 16),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        transform_sqrt=True
    )
    return features

In [None]:
import os, joblib
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def load_data(path):
    X, y = [], []
    for label, cls in enumerate(["cats", "dogs"]):
        for img_name in os.listdir(os.path.join(path, cls)):
            img = cv2.imread(os.path.join(path, cls, img_name))
            if img is None: continue
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            X.append(extract_hog_features(img))
            y.append(label)
    return np.array(X), np.array(y)

X, y = load_data("/content/dataset/Dataset/training_set")

model = Pipeline([
    ("scaler", StandardScaler()),
    ("lr", LogisticRegression(max_iter=1000))
])

model.fit(X, y)
joblib.dump(model, "logistic_model.pkl")

['logistic_model.pkl']

In [None]:
from sklearn.svm import SVC

model = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(kernel="rbf", C=5, gamma="scale"))
])

model.fit(X, y)
joblib.dump(model, "svm_model.pkl")

['svm_model.pkl']

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=200,
    max_depth=20,
    random_state=42
)

model.fit(X, y)
joblib.dump(model, "rf_model.pkl")

['rf_model.pkl']

In [None]:
from sklearn.cluster import KMeans
from scipy.stats import mode

kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(X)

cluster_map = {}
for i in range(2):
    cluster_map[i] = mode(y[kmeans.labels_ == i], keepdims=True)[0][0]

joblib.dump((kmeans, cluster_map), "kmeans_model.pkl")

['kmeans_model.pkl']

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_directory(
    "/content/dataset/Dataset/training_set",
    target_size=(128,128),
    batch_size=32,
    class_mode="binary"
)

model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(128,128,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(train_gen, epochs=5)
model.save("cnn_model.h5")

Found 8005 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 56ms/step - accuracy: 0.5598 - loss: 0.7798
Epoch 2/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 49ms/step - accuracy: 0.7023 - loss: 0.5658
Epoch 3/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.7811 - loss: 0.4446
Epoch 4/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 49ms/step - accuracy: 0.8516 - loss: 0.3356
Epoch 5/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.9249 - loss: 0.1987




### Testing

In [None]:
import joblib, cv2, os, numpy as np
from sklearn.metrics import accuracy_score
import tensorflow as tf

def load_test_data(path):
    X, y = [], []
    for label, cls in enumerate(["cats", "dogs"]):
        for img_name in os.listdir(os.path.join(path, cls)):
            img = cv2.imread(os.path.join(path, cls, img_name))
            if img is None: continue
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            X.append(extract_hog_features(img))
            y.append(label)
    return np.array(X), np.array(y)

X_test, y_test = load_test_data("/content/dataset/Dataset/test_set")

models = {
    "Logistic": joblib.load("logistic_model.pkl"),
    "SVM": joblib.load("svm_model.pkl"),
    "RandomForest": joblib.load("rf_model.pkl"),
}

for name, model in models.items():
    preds = model.predict(X_test)
    print(name, "Accuracy:", accuracy_score(y_test, preds))

# KMeans
kmeans, cmap = joblib.load("kmeans_model.pkl")
clusters = kmeans.predict(X_test)
preds = np.array([cmap[c] for c in clusters])
print("KMeans Accuracy:", accuracy_score(y_test, preds))

# CNN
cnn = tf.keras.models.load_model("cnn_model.h5")
y_cnn = []
y_true = []

for label, cls in enumerate(["cats", "dogs"]):
    for img_name in os.listdir(f"/content/dataset/Dataset/test_set/{cls}"):
        img = cv2.imread(f"/content/dataset/Dataset/test_set/{cls}/{img_name}")
        if img is None: continue
        img = cv2.resize(img, (128,128)) / 255.0
        y_cnn.append(cnn.predict(img.reshape(1,128,128,3))[0][0] > 0.5)
        y_true.append(label)

print("CNN Accuracy:", accuracy_score(y_true, y_cnn))

Logistic Accuracy: 0.6777063766683143
SVM Accuracy: 0.7750865051903114
RandomForest Accuracy: 0.7276322293623332
KMeans Accuracy: 0.5180425111220959




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 519ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2