In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
ashishsaxena2209_animal_image_datasetdog_cat_and_panda_path = kagglehub.dataset_download('ashishsaxena2209/animal-image-datasetdog-cat-and-panda')

print('Data source import complete.')


In [None]:
import os
import glob
from skimage.io import imread
from skimage.transform import resize, rotate
from skimage.feature import hog
from skimage.color import rgb2gray, rgb2hsv
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Define the correct dataset path
dataset_path = '/root/.cache/kagglehub/datasets/ashishsaxena2209/animal-image-datasetdog-cat-and-panda/versions/1/animals'

# Define categories
categories = ['cats', 'dogs', 'panda']

# Initialize lists for images and labels
animal_images = []
animal_labels = []

# Load images with data augmentation
for animal in categories:
    folder_path = os.path.join(dataset_path, animal)
    for image_path in glob.glob(f'{folder_path}/*.jpg'):
        try:
            image = imread(image_path)
            image = resize(image, (128, 64))  # Resize images to uniform size

            # Check if the image is RGB (3 channels), if not, convert to 3 channels
            if image.ndim == 2:  # If the image is grayscale (only one channel)
                image = np.stack([image] * 3, axis=-1)  # Convert to a 3-channel image by repeating the grayscale image
            elif image.shape[2] != 3:  # If the image does not have 3 channels
                raise ValueError(f"Unexpected number of channels: {image.shape[2]} in {image_path}")

            # Data augmentation: Rotate and flip
            image_rotated = rotate(image, angle=15)
            image_flipped = np.fliplr(image)

            # Convert images to grayscale and add to dataset
            image_gray = rgb2gray(image)
            image_rotated_gray = rgb2gray(image_rotated)
            image_flipped_gray = rgb2gray(image_flipped)

            animal_images.extend([image_gray, image_rotated_gray, image_flipped_gray])
            animal_labels.extend([animal, animal, animal])
        except Exception as e:
            print(f"Error loading {image_path}: {e}")

# Check if images were loaded successfully
print(f"Loaded {len(animal_images)} images and {len(animal_labels)} labels.")

# If no images are loaded, stop execution
if len(animal_images) == 0:
    raise ValueError("No images found. Please check your dataset path and structure.")

# Extract HOG features from grayscale images
hog_features = [
    hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    for image in animal_images
]
hog_features = np.array(hog_features)

# Encoding labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(animal_labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(hog_features, encoded_labels, test_size=0.2, random_state=42)

# Grid search for optimal KNN parameters (using n_neighbors)
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters found by grid search
print(f"Best parameters: {grid_search.best_params_}")

# Train with the best parameters
knn_clf = grid_search.best_estimator_
y_pred = knn_clf.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


In [None]:
from google.colab import drive
drive.mount('/content/drive')

from PIL import Image
import numpy as np
from skimage.feature import hog
from skimage.transform import resize
from sklearn.neighbors import KNeighborsClassifier  # Assuming the classifier is trained

# Set the path to the image in your Google Drive
image_path = '/content/drive/MyDrive/test2.jpg'  # Update this path

# Open the image using Pillow
image = Image.open(image_path)
image = image.convert("RGB")  # Ensure the image is in RGB format

# Convert the image to a numpy array
image_np = np.array(image)

# Resize the image to the same dimensions used during training (128x64)
image_resized = resize(image_np, (128, 64))

# Extract HOG features
hog_features = hog(image_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False, channel_axis=-1)

# Assuming knn_clf is the trained KNN model
prediction = knn_clf.predict([hog_features])

# Print the predicted class
print(f"Predicted class: {prediction}")


In [None]:
# Part 2 for VS will not work on Colab
import cv2
import json
import os
import tkinter as tk
from tkinter import simpledialog

# Initialize variables
drawing = False  # True if the mouse is pressed
ix, iy = -1, -1  # Initial coordinates
annotations = []
num_bboxes = 0  # Number of bounding boxes to draw
bboxes_drawn = 0  # Counter for bounding boxes drawn

# List of classes (labels)
classes = ['cat', 'dog', 'panda']  # Update with your labels

# Mouse callback function
def draw_rectangle(event, x, y, flags, param):
    global ix, iy, drawing, annotations, bboxes_drawn

    if event == cv2.EVENT_LBUTTONDOWN:  # Mouse button pressed
        drawing = True
        ix, iy = x, y

    elif event == cv2.EVENT_MOUSEMOVE:  # Mouse is moving
        if drawing:
            img_copy = img.copy()
            cv2.rectangle(img_copy, (ix, iy), (x, y), (0, 255, 0), 2)
            cv2.imshow("Image", img_copy)

    elif event == cv2.EVENT_LBUTTONUP:  # Mouse button released
        drawing = False
        cv2.rectangle(img, (ix, iy), (x, y), (0, 255, 0), 2)
        cv2.imshow("Image", img)
        print("Bounding box drawn!")

        # Ask for the label using tkinter
        root = tk.Tk()
        root.withdraw()  # Hide the main window
        label = simpledialog.askstring("Input", f"Enter label for this object (Available labels: {', '.join(classes)}):")
        root.destroy()

        if label in classes:
            annotations.append({"label": label, "bbox": [ix, iy, x, y]})
            print(f"Bounding box and label saved: {label}")
            bboxes_drawn += 1
        else:
            print("Invalid label! Bounding box not saved.")

# Load image
image_name = 'test2.jpg'  # Update with the correct image name
image_path = os.path.join(os.getcwd(), image_name)
img = cv2.imread(image_path)

if img is None:
    print("Image not found! Check the path.")
else:
    # Ask for the number of bounding boxes to draw
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    num_bboxes = simpledialog.askinteger("Input", "Enter the number of bounding boxes to draw:")
    root.destroy()

    if num_bboxes is None or num_bboxes <= 0:
        print("Invalid number of bounding boxes. Exiting.")
    else:
        # Create a window and set the mouse callback
        cv2.namedWindow("Image")
        cv2.setMouseCallback("Image", draw_rectangle)

        # Show the image and wait for user input
        print("Press 'q' to quit and save annotations.")
        while True:
            cv2.imshow("Image", img)
            key = cv2.waitKey(10) & 0xFF  # Increase wait time to 10ms
            if key == ord('q'):  # Press 'q' to quit
                break
            if cv2.getWindowProperty("Image", cv2.WND_PROP_VISIBLE) < 1:  # Handle window close event
                break
            if bboxes_drawn >= num_bboxes:  # Stop when the specified number of bounding boxes are drawn
                break

        # Save annotations as JSON
        annotations_path = os.path.join(os.getcwd(), 'annotations.json')
        with open(annotations_path, 'w') as f:
            json.dump(annotations, f)

        print(f"Annotations saved to {annotations_path}")
        cv2.destroyAllWindows()

In [None]:
# also part 2 next part
import cv2
import json
import os

# Load annotations from JSON file
annotations_path = os.path.join(os.getcwd(), 'annotations.json')
with open(annotations_path, 'r') as f:
    annotations = json.load(f)

# Load image
image_name = 'test2.jpg'  # Update with the correct image name
image_path = os.path.join(os.getcwd(), image_name)
img = cv2.imread(image_path)

if img is None:
    print("Image not found! Check the path.")
else:
    # Draw bounding boxes on the image
    for annotation in annotations:
        label = annotation['label']
        bbox = annotation['bbox']
        x1, y1, x2, y2 = bbox
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the image with bounding boxes
    cv2.imshow("Detected Objects", img)
    print("Press 'q' to quit.")
    while True:
        key = cv2.waitKey(10) & 0xFF
        if key == ord('q'):  # Press 'q' to quit
            break
        if cv2.getWindowProperty("Detected Objects", cv2.WND_PROP_VISIBLE) < 1:  # Handle window close event
            break

    cv2.destroyAllWindows()