In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV

# Function to load and preprocess images
def load_images(folder):
    images = []
    labels = []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        for filename in os.listdir(label_path):
            img_path = os.path.join(label_path, filename)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (64, 64))  # Resize images to a common size
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

# Load training data
train_folder = 'train'  # Update this with the actual path

X_train, y_train = load_images(train_folder)

# Flatten the images for SVM
X_train_flat = X_train.reshape(X_train.shape[0], -1)

# Encode labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)

# Split the data into training and validation sets
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_flat, y_train_encoded, test_size=0.2, random_state=42
)

# Hyperparameter tuning using GridSearchCV
param_grid = {'C': [1, 10, 100], 'gamma': [0.01, 0.1, 1], 'kernel': ['rbf']}
grid_search = GridSearchCV(SVC(), param_grid, cv=3)
grid_search.fit(X_train_split, y_train_split)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Train the final SVM model with the best hyperparameters
svm_model = SVC(C=best_params['C'], gamma=best_params['gamma'], kernel=best_params['kernel'])
svm_model.fit(X_train_split, y_train_split)

# Make predictions on the validation set
y_val_pred = svm_model.predict(X_val_split)

# Calculate accuracy on the validation set
accuracy = accuracy_score(y_val_split, y_val_pred)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')


In [14]:
# Load test data
test_folder = 'test'  # Update this with the actual path
X_test, y_test = load_images(test_folder)

# Flatten the test images for SVM
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Make predictions on the test set
y_test_pred = svm_model.predict(X_test_flat)

# Decode labels
y_test_pred_decoded = le.inverse_transform(y_test_pred)

# Save the predictions to a CSV file
results = pd.DataFrame({'Filename': os.listdir(os.path.join(test_folder, 'cats')) + os.listdir(os.path.join(test_folder, 'dogs')),
                        'Prediction': y_test_pred_decoded})
results.to_csv('svm_predictions.csv', index=False)

print('Predictions saved to svm_predictions.csv')


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4000,) + inhomogeneous part.