In [5]:
import kagglehub
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# Download the latest version of the dataset
path = kagglehub.dataset_download("muniryadi/cat-vs-rabbit")
print("Path to dataset files:", path)

# List files in the directory to check the dataset contents
dataset_files = os.listdir(path)
print("Files in the dataset directory:", dataset_files)

# Assuming that the CSV file and images are in the correct directories
# Adjust these paths based on the actual files in your dataset directory
csv_path = os.path.join(path, 'cats_vs_rabbit.csv')  # Adjust if necessary
image_dir = os.path.join(path, 'images')  # Adjust if the images are stored in a different folder

# Ensure the CSV file exists
if not os.path.exists(csv_path):
    print(f"CSV file not found at {csv_path}")
else:
    print(f"CSV file found at {csv_path}")

# Ensure the images directory exists
if not os.path.exists(image_dir):
    print(f"Images directory not found at {image_dir}")
else:
    print(f"Images directory found at {image_dir}")

# Parameters
image_size = (224, 224)  # Input size for VGG16

# Load data from CSV
def load_data_from_csv(csv_path, image_dir):
    try:
        data = pd.read_csv(csv_path)
        print("CSV loaded successfully.")
    except Exception as e:
        print(f"Error loading CSV: {e}")
        return [], []

    features = []
    labels = []
    print("Loading images from CSV...")

    for index, row in data.iterrows():
        img_path = os.path.join(image_dir, row['Filename'])

        if os.path.exists(img_path):  # Check if the image exists
            try:
                # Load and preprocess the image
                img = load_img(img_path, target_size=image_size)
                img_array = img_to_array(img)
                img_array = preprocess_input(img_array)
                features.append(img_array)
                labels.append(0 if row['Label'] == 'cat' else 1)  # 0 for cats, 1 for rabbits
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
        else:
            print(f"Image not found: {img_path}")

    print(f"Loaded {len(features)} images successfully.")
    return np.array(features), np.array(labels)

# Load and preprocess the dataset
features, labels = load_data_from_csv(csv_path, image_dir)

if len(features) == 0 or len(labels) == 0:
    print("No images or labels loaded. Exiting the process.")
else:
    # Flatten images for SVM (convert 3D array to 1D)
    features = features.reshape(len(features), -1)

    # Split dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Train the SVM
    print("Training the SVM model...")
    svm = SVC(kernel='linear')  # Change to 'rbf' or 'poly' for different kernels
    svm.fit(X_train, y_train)
    print("Model training complete.")

    # Evaluate the model
    print("Evaluating the model...")
    y_pred = svm.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))


Path to dataset files: /root/.cache/kagglehub/datasets/muniryadi/cat-vs-rabbit/versions/4
Files in the dataset directory: ['val-cat-rabbit', 'test-images', 'train-cat-rabbit']
CSV file not found at /root/.cache/kagglehub/datasets/muniryadi/cat-vs-rabbit/versions/4/cats_vs_rabbit.csv
Images directory not found at /root/.cache/kagglehub/datasets/muniryadi/cat-vs-rabbit/versions/4/images
Error loading CSV: [Errno 2] No such file or directory: '/root/.cache/kagglehub/datasets/muniryadi/cat-vs-rabbit/versions/4/cats_vs_rabbit.csv'
No images or labels loaded. Exiting the process.
