In [2]:
import cv2
import numpy as np
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Set your base directory
base_path = r"C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2"

# Load training data
def load_train_data():
    data = []
    labels = []
    
    print("Loading training data...")

    # Load cat training images: cat.0.jpg to cat.4.jpg (you can increase range)
    for i in range(5):
        cat_path = os.path.join(base_path, "train", "Cat", f"cat.{i}.jpg")
        img = cv2.imread(cat_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(0)
            print(f"Loaded: {cat_path}")
        else:
            print(f"Failed to load: {cat_path}")

    # Load dog training images: dog.0.jpg to dog.4.jpg
    for i in range(5):
        dog_path = os.path.join(base_path, "train", "Dog", f"dog.{i}.jpg")
        img = cv2.imread(dog_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(1)
            print(f"Loaded: {dog_path}")
        else:
            print(f"Failed to load: {dog_path}")

    return np.array(data), np.array(labels)

# Load test data
def load_test_data():
    data = []
    labels = []

    print("Loading test data...")

    # Test cat images: Cat (1).jpg to Cat (5).jpg
    for i in range(1, 6):
        cat_path = os.path.join(base_path, "test", "Cat", f"Cat ({i}).jpg")
        img = cv2.imread(cat_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(0)
            print(f"Loaded: {cat_path}")
        else:
            print(f"Failed to load: {cat_path}")

    # Test dog images: Dog (1).jpg to Dog (5).jpg
    for i in range(1, 6):
        dog_path = os.path.join(base_path, "test", "Dog", f"Dog ({i}).jpg")
        img = cv2.imread(dog_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(1)
            print(f"Loaded: {dog_path}")
        else:
            print(f"Failed to load: {dog_path}")

    return np.array(data), np.array(labels)

# Load and preprocess data
X_train, y_train = load_train_data()
X_test, y_test = load_test_data()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# KNN Classifier
print("\nKNN CLASSIFIER")
print("-" * 30)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)
knn_pred = knn.predict(X_test_scaled)
knn_acc = accuracy_score(y_test, knn_pred)
print(f"KNN Accuracy: {knn_acc:.4f}")
print(classification_report(y_test, knn_pred, target_names=['Cat', 'Dog']))

# Logistic Regression
print("\nLOGISTIC REGRESSION")
print("-" * 30)
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)
lr_acc = accuracy_score(y_test, lr_pred)
print(f"Logistic Regression Accuracy: {lr_acc:.4f}")
print(classification_report(y_test, lr_pred, target_names=['Cat', 'Dog']))

# Model Comparison
print("\nMODEL COMPARISON")
print("-" * 30)
print(f"KNN Accuracy: {knn_acc:.4f}")
print(f"Logistic Regression Accuracy: {lr_acc:.4f}")
model_name = "logistic_regression" if lr_acc >= knn_acc else "knn"
best_model = lr if model_name == "logistic_regression" else knn
print(f"Best Model: {model_name.replace('_', ' ').title()}")

# Save model
joblib.dump(best_model, f"{model_name}_model.pkl")
joblib.dump(scaler, f"{model_name}_scaler.pkl")
print(f"Model and scaler saved as {model_name}_model.pkl and {model_name}_scaler.pkl")

# Test on new image
def test_new_image(image_path):
    model = joblib.load(f'{model_name}_model.pkl')
    scaler = joblib.load(f'{model_name}_scaler.pkl')

    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load image {image_path}")
        return None

    img = cv2.resize(img, (64, 64))
    img = img.flatten() / 255.0
    img_scaled = scaler.transform(img.reshape(1, -1))

    prediction = model.predict(img_scaled)[0]
    label = "Cat" if prediction == 0 else "Dog"

    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(img_scaled)[0]
        confidence = max(proba) * 100
        print(f"Prediction: {label} ({confidence:.1f}% confidence)")
    else:
        print(f"Prediction: {label}")

    return prediction

# Example usage
print("\nExample test command:")
print(r"test_new_image(r'C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\test\Cat\Cat (1).jpg')")


Loading training data...
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Cat\cat.0.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Cat\cat.1.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Cat\cat.2.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Cat\cat.3.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Cat\cat.4.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Dog\dog.0.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Dog\dog.1.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Dog\dog.2.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2\train\Dog\dog.3.jpg
Loaded: C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2

In [4]:
import cv2
import numpy as np
import os
import glob
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib

base_path = r"C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2"

def load_train_data():
    data = []
    labels = []
    
    # Load cats
    cat_files = glob.glob(os.path.join(base_path, "train", "Cat", "cat.*.jpg"))
    for cat_path in cat_files:
        img = cv2.imread(cat_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(0)
    
    # Load dogs
    dog_files = glob.glob(os.path.join(base_path, "train", "Dog", "dog.*.jpg"))
    for dog_path in dog_files:
        img = cv2.imread(dog_path)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(1)
    
    print(f"Training: {len(cat_files)} cats, {len(dog_files)} dogs")
    return np.array(data), np.array(labels)

def load_test_data():
    data = []
    labels = []
    
    # Test cats
    for i in range(1, 6):
        img = cv2.imread(os.path.join(base_path, "test", "Cat", f"Cat ({i}).jpg"))
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(0)
    
    # Test dogs
    for i in range(1, 6):
        img = cv2.imread(os.path.join(base_path, "test", "Dog", f"Dog ({i}).jpg"))
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img = img.flatten() / 255.0
            data.append(img)
            labels.append(1)
    
    return np.array(data), np.array(labels)

# Load data
X_train, y_train = load_train_data()
X_test, y_test = load_test_data()

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# KNN
print("\nKNN CLASSIFIER")
print("-" * 30)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)
knn_pred = knn.predict(X_test_scaled)
knn_acc = accuracy_score(y_test, knn_pred)
print(f"Accuracy: {knn_acc:.4f}")

# Logistic Regression
print("\nLOGISTIC REGRESSION")
print("-" * 30)
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)
lr_acc = accuracy_score(y_test, lr_pred)
print(f"Accuracy: {lr_acc:.4f}")

# Compare models
print(f"\nKNN: {knn_acc:.4f}")
print(f"Logistic Regression: {lr_acc:.4f}")

# Save best model
if lr_acc >= knn_acc:
    best_model = lr
    model_name = "lr"
    print("Best: Logistic Regression")
else:
    best_model = knn
    model_name = "knn"
    print("Best: KNN")

joblib.dump(best_model, f"{model_name}_model.pkl")
joblib.dump(scaler, f"{model_name}_scaler.pkl")

# Test function
def test_new_image(image_path):
    model = joblib.load(f'{model_name}_model.pkl')
    scaler = joblib.load(f'{model_name}_scaler.pkl')
    
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not load: {image_path}")
        return None
    
    img = cv2.resize(img, (64, 64))
    img = img.flatten() / 255.0
    img_scaled = scaler.transform(img.reshape(1, -1))
    
    prediction = model.predict(img_scaled)[0]
    label = "Cat" if prediction == 0 else "Dog"
    
    if hasattr(model, "predict_proba"):
        confidence = max(model.predict_proba(img_scaled)[0]) * 100
        print(f"Prediction: {label} ({confidence:.1f}%)")
    else:
        print(f"Prediction: {label}")
    
    return prediction

print("\nTest with: test_new_image('your_image.jpg')")

Training: 1000 cats, 1000 dogs

KNN CLASSIFIER
------------------------------
Accuracy: 0.4000

LOGISTIC REGRESSION
------------------------------
Accuracy: 0.6000

KNN: 0.4000
Logistic Regression: 0.6000
Best: Logistic Regression

Test with: test_new_image('your_image.jpg')


In [None]:
# Import necessary libraries
import cv2
import numpy as np
import os
import glob
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
import joblib

# Path to the dataset directory
base_path = r"C:\Users\rtape\Downloads\Seneca\CVI620NSB_Summer2025\codes\Assignment2\Q2"

# -------------------
# Step 1: Load and preprocess data
# -------------------

data = []
labels = []

# Loop through all image files in the dataset
for address in glob.glob(os.path.join(base_path, '*', '*', '*')):
    img = cv2.imread(address)  # Read image
    if img is None:
        continue  # Skip if image couldn't be loaded
    
    img = cv2.resize(img, (32, 32))  # Resize to 32x32 pixels
    img = img.flatten() / 255.0      # Flatten to 1D array and normalize pixel values
    data.append(img)
    
    # Assign label based on folder name: 0 = Cat, 1 = Dog
    labels.append(0 if 'Cat' in address else 1)

# Convert to NumPy arrays
X = np.array(data)
y = np.array(labels)

# -------------------
# Step 2: Split and scale data
# -------------------

# Split dataset into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features to have 0 mean and unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------
# Step 3: Train models using GridSearchCV
# -------------------

models = {}

# K-Nearest Neighbors
knn_params = {'n_neighbors': [1, 3, 5, 7], 'weights': ['uniform', 'distance']}
knn_grid = GridSearchCV(KNeighborsClassifier(), knn_params, cv=3)
knn_grid.fit(X_train_scaled, y_train)
models['KNN'] = (knn_grid.best_estimator_, accuracy_score(y_test, knn_grid.predict(X_test_scaled)))

# Logistic Regression
lr_params = {'C': [0.1, 1, 10], 'solver': ['lbfgs', 'liblinear']}
lr_grid = GridSearchCV(LogisticRegression(max_iter=1000), lr_params, cv=3)
lr_grid.fit(X_train_scaled, y_train)
models['Logistic Regression'] = (lr_grid.best_estimator_, accuracy_score(y_test, lr_grid.predict(X_test_scaled)))

# SGD Classifier (Stochastic Gradient Descent)
sgd_params = {'alpha': [0.0001, 0.001, 0.01], 'loss': ['hinge', 'log_loss']}
sgd_grid = GridSearchCV(SGDClassifier(max_iter=1000), sgd_params, cv=3)
sgd_grid.fit(X_train_scaled, y_train)
models['SGD'] = (sgd_grid.best_estimator_, accuracy_score(y_test, sgd_grid.predict(X_test_scaled)))

# -------------------
# Step 4: Compare model performances
# -------------------

print("RESULTS:")
for name, (model, acc) in models.items():
    print(f"{name}: {acc:.4f}")  # Print accuracy of each model

# -------------------
# Step 5: Save the best model
# -------------------

# Identify the best performing model
best_name = max(models.keys(), key=lambda x: models[x][1])
best_model, best_acc = models[best_name]

# Save best model and scaler to disk
joblib.dump(best_model, 'best_cat_dog_model.pkl')
joblib.dump(scaler, 'best_cat_dog_scaler.pkl')

print(f"\nBest Model: {best_name} ({best_acc:.4f})")
print("Model saved as best_cat_dog_model.pkl")

# -------------------
# Step 6: Test new image function
# -------------------

# Predict label for new image
def test_image(image_path):
    # Load saved model and scaler
    model = joblib.load('best_cat_dog_model.pkl')
    scaler = joblib.load('best_cat_dog_scaler.pkl')
    
    # Load and preprocess image
    img = cv2.imread(image_path)
    if img is None:
        return "Could not load image"
    
    img = cv2.resize(img, (32, 32))
    img = img.flatten() / 255.0
    img_scaled = scaler.transform(img.reshape(1, -1))
    
    # Predict label (0 = Cat, 1 = Dog)
    prediction = model.predict(img_scaled)[0]
    label = "Cat" if prediction == 0 else "Dog"
    
    # If model supports confidence scoring
    if hasattr(model, "predict_proba"):
        confidence = max(model.predict_proba(img_scaled)[0]) * 100
        return f"{label} ({confidence:.1f}%)"
    else:
        return label

# -------------------
# Step 7: Test on sample dataset images
# -------------------

# List of test images to try
test_paths = [
    os.path.join(base_path, "test", "Cat", "Cat (1).jpg"),
    os.path.join(base_path, "test", "Dog", "Dog (1).jpg")
]

print("\nTest Results:")
for path in test_paths:
    if os.path.exists(path):
        result = test_image(path)
        actual = "Cat" if "Cat" in path else "Dog"
        print(f"{os.path.basename(path)}: {result} (Actual: {actual})")

# -------------------
# Final Note
# -------------------

print("\nTo test internet images: test_image('path_to_image.jpg')")


RESULTS:
KNN: 0.5556
Logistic Regression: 0.5920
SGD: 0.5788

Best Model: Logistic Regression (0.5920)
Model saved as best_cat_dog_model.pkl

Test Results:
Cat (1).jpg: Cat (83.2%) (Actual: Cat)
Dog (1).jpg: Dog (77.8%) (Actual: Dog)

To test internet images: test_image('path_to_image.jpg')
