In [None]:
import os
import pickle

from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from PIL import Image
from sklearn.preprocessing import StandardScaler

In [None]:
# prepare data
input_dir = r"C:\Users\vales\OneDrive\Escritorio\kagglecatsanddogs_5340 (1)\PetImages"
categories = ['Cat', 'Dog']

In [None]:
# Define lists
data = []
labels = []
for category_idx, category in enumerate(categories):
    category_path = os.path.join(input_dir, category)
    for file in os.listdir(category_path):
        img_path = os.path.join(category_path, file)
        try:
            # Open the image file
            with Image.open(img_path) as img:
                # Convert the image to 'RGB' if it's not already
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                # Resize the image
                img = img.resize((15, 15))
                # Flatten the image data
                img_array = np.array(img).flatten()
                # Append data and label
                data.append(img_array)
                labels.append(category_idx)
        except (IOError, EOFError) as e:
            print(f"Failed to read {img_path}. Error: {e}")
            continue  # Skip this file and move to the next one

# Convert lists to numpy arrays
data = np.asarray(data)
labels = np.asarray(labels)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create an SVM classifier
svm = SVC(kernel='linear', random_state=42)

# Train the classifier
svm.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = svm.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Optionally, use GridSearchCV to optimize the model parameters
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf', 'poly', 'sigmoid']
}
grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid_search.fit(X_train, y_train)
print("Best parameters found: ", grid_search.best_params_)
best_svc = grid_search.best_estimator_

# Re-evaluate using the best parameters
y_pred_best = best_svc.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Optimized Accuracy: {accuracy_best * 100:.2f}%")