In [9]:
#Training the model with Support Vector Classifier

import os
import numpy as np
from PIL import Image
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Example: using Support Vector Classifier

# Define the path to the directory containing your image data
data_dir = "images/"

# List all the filenames in the data directory
filenames = [filename for filename in os.listdir(data_dir) if filename.endswith(".jpg")]

# Shuffle the list of filenames to ensure randomness
random.shuffle(filenames)

# Initialize a list to store problematic filenames
problematic_filenames = []

# Define a function for loading and preprocessing images
def load_and_preprocess_images(data_filenames):
    # Initialize lists to store images and labels
    images = []
    labels = []

    # Loop through the image filenames
    for filename in data_filenames:
        try:
            # Load and preprocess the image
            img = Image.open(os.path.join(data_dir, filename))
            #img = img.resize((64, 64))  # Resize images to a consistent size
            img = np.array(img)  # Convert to NumPy array
            images.append(img)

            # Determine the label based on the presence of "motion" in filename
            if "_motion" in filename.lower():  # Convert to lowercase for case insensitivity
                labels.append(1)  # Label as motion
            else:
                labels.append(0)  # Label as no motion
        except Exception as e:
            # If an error occurs, skip the image and add the filename to problematic_filenames
            print(f"Error processing {filename}: {str(e)}")
            problematic_filenames.append(filename)
            continue

    # Convert lists to NumPy arrays
    X = np.array(images)
    y = np.array(labels)

    return X, y

# Load and preprocess data for the entire dataset
X, y = load_and_preprocess_images(filenames)

# Debugging: Check the number of samples in X and y
print("Number of samples in X:", X.shape[0])
print("Number of samples in y:", len(y))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a machine learning model (e.g., Support Vector Classifier)
model = SVC()
model.fit(X_train.reshape(X_train.shape[0], -1), y_train)  # Flatten X_train

# Evaluate the model
accuracy = model.score(X_test.reshape(X_test.shape[0], -1), y_test)  # Flatten X_test
print(f"Accuracy: {accuracy}")

# Print the list of problematic filenames for manual inspection and deletion
print("Problematic Filenames:", problematic_filenames)

import joblib

# Save the trained model to a file
joblib.dump(model, "trained_img_model.pkl")


Number of samples in X: 100
Number of samples in y: 100
Accuracy: 0.9
Problematic Filenames: []


['trained_img_model.pkl']