In [None]:
pip install pillow

In [None]:
import os
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from PIL import Image
from tqdm import tqdm
import random

# Directories
train_dir = 'train'  # Replace with actual path to your training data
test_dir = 'test1'  # Replace with actual path to your test data

# Image size for resizing
image_size = (64, 64)

def load_images_from_folder(folder, image_size, batch_size=None):
    images = []
    labels = []
    filenames = os.listdir(folder)
    random.shuffle(filenames)  # Shuffle filenames to ensure random sampling
    
    cat_count, dog_count = 0, 0  # Keep track of the number of cats and dogs
    for filename in tqdm(filenames, desc=f'Loading images from {folder}'):
        if 'cat' in filename:
            label = 0  # 'cat'
            cat_count += 1
        elif 'dog' in filename:
            label = 1  # 'dog'
            dog_count += 1
        else:
            continue
        img_path = os.path.join(folder, filename)
        img = Image.open(img_path).convert('RGB')
        img = img.resize(image_size)
        img = np.array(img, dtype=np.uint8)
        images.append(img)
        labels.append(label)
        if batch_size and len(images) >= batch_size and cat_count > 0 and dog_count > 0:
            print(f"Batch limit reached with {cat_count} cats and {dog_count} dogs.")
            break
    
    if cat_count == 0 or dog_count == 0:
        raise ValueError("Training data does not contain samples from both classes (cats and dogs).")
    
    print(f'Loaded {len(images)} images and {len(labels)} labels from {folder}')
    return np.array(images), np.array(labels)

def extract_features(images):
    # Flatten the image arrays
    features = []
    for img in tqdm(images, desc='Extracting features'):
        features.append(img.flatten())
    return np.array(features)

# Load a manageable batch of training data
batch_size = 5000  # Adjust as necessary to fit in memory
train_images, train_labels = load_images_from_folder(train_dir, image_size, batch_size=batch_size)

# Check labels to ensure both classes are present
unique_labels = np.unique(train_labels)
if len(unique_labels) < 2:
    raise ValueError("Training data does not contain samples from both classes (cats and dogs).")

# Extract features
train_features = extract_features(train_images)

# Debug print to check the size of the training features and labels
print(f'train_features shape: {train_features.shape}')
print(f'train_labels shape: {train_labels.shape}')

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

# Debug prints to check the sizes of the split datasets
print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}, y_val shape: {y_val.shape}')

# Create and train SVM model
clf = svm.SVC(kernel='linear')

# Train SVM model
print('Training the SVM model...')
clf.fit(X_train, y_train)

# Validate the model
print('Validating the model...')
y_val_pred = clf.predict(X_val)
validation_accuracy = accuracy_score(y_val, y_val_pred) * 100
print(f'Validation Accuracy: {validation_accuracy:.2f}%')

# Load a manageable batch of test data
test_images, _ = load_images_from_folder(test_dir, image_size, batch_size=batch_size)

# Debug print to check if test_images is empty or not loaded
print(f'Loaded {len(test_images)} images from test directory')

# Extract features for test data
test_features = extract_features(test_images)

# Debug print to check if test_features is empty or not extracted
print(f'Extracted {len(test_features)} features from test images')

# Predict using the trained model
test_predictions = []
for i, test_feature in enumerate(tqdm(test_features, desc='Predicting test images')):
    prediction = clf.predict([test_feature])[0]
    label = 'cat' if prediction == 0 else 'dog'
    test_predictions.append(label)
    print(f'{i + 1},{label}')  # Print in the desired format
