In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import random
from skimage.feature import local_binary_pattern, hog
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from joblib import load
import logging


In [2]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [4]:
# Paths to data directories and model
new_test_frames_path = "/Users/eshitasuri/Desktop/Frames/test"
model_filename = 'incremental_model.pkl'

In [5]:
def calculate_color_histogram(image):
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    brightness = np.mean(hsv[:, :, 2])
    return brightness

def calculate_contrast(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    contrast = l.std()
    return contrast

def calculate_edge_density(image):
    edges = cv2.Canny(image, 100, 200)
    edge_density = np.mean(edges)
    return edge_density

def calculate_color_moments(image):
    moments = cv2.moments(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    hu_moments = cv2.HuMoments(moments).flatten()
    return hu_moments

def extract_lbp(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = np.zeros_like(gray)
    for i in range(1, gray.shape[0] - 1):
        for j in range(1, gray.shape[1] - 1):
            center = gray[i, j]
            binary_string = ''
            binary_string += '1' if gray[i - 1, j - 1] > center else '0'
            binary_string += '1' if gray[i - 1, j] > center else '0'
            binary_string += '1' if gray[i - 1, j + 1] > center else '0'
            binary_string += '1' if gray[i, j + 1] > center else '0'
            binary_string += '1' if gray[i + 1, j + 1] > center else '0'
            binary_string += '1' if gray[i + 1, j] > center else '0'
            binary_string += '1' if gray[i + 1, j - 1] > center else '0'
            binary_string += '1' if gray[i, j - 1] > center else '0'
            lbp[i, j] = int(binary_string, 2)
    return lbp.flatten()

def extract_hog(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    h, _ = hog(gray, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
    return h.flatten()

def calculate_optical_flow(prev_image, next_image):
    prev_gray = cv2.cvtColor(prev_image, cv2.COLOR_BGR2GRAY)
    next_gray = cv2.cvtColor(next_image, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(prev_gray, next_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    return mag.flatten()

In [6]:
def extract_features(images):
    histograms = []
    brightness = []
    contrast = []
    edge_density = []
    histogram_moments = []
    lbp = []
    hog_features = []
    optical_flow = []

    for img in images:
        histograms.append(calculate_color_histogram(img))
        brightness.append(calculate_brightness(img))
        contrast.append(calculate_contrast(img))
        edge_density.append(calculate_edge_density(img))
        histogram_moments.append(calculate_color_moments(img))
        lbp.append(extract_lbp(img))
        hog_features.append(extract_hog(img))
        optical_flow.append(calculate_optical_flow(img, img))  

    features = np.hstack([
        np.array(histograms),
        np.array(brightness)[:, np.newaxis],
        np.array(contrast)[:, np.newaxis],
        np.array(edge_density)[:, np.newaxis],
        np.array(histogram_moments),
        np.array(lbp),
        np.array(hog_features),
        np.array(optical_flow)
    ])
    return features

In [7]:
def load_images_from_folder(folder, batch_size):
    images = []
    filenames = os.listdir(folder)
    for i, filename in enumerate(filenames):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
        if len(images) == batch_size or i == len(filenames) - 1:
            yield images, filenames[i-batch_size+1:i+1]  # Yield the batch of images and their filenames
            images = []


In [8]:
# Process and extract features for the test data
def process_test_data(folder_path, batch_size):
    all_features = []
    selected_filenames = []
    batch_number = 1
    for batch, filenames in load_images_from_folder(folder_path, batch_size):
        logging.info(f"Processing batch {batch_number}")
        features = extract_features(batch)
        all_features.append(features)
        selected_filenames.extend(filenames)
        batch_number += 1
    all_features_df = pd.DataFrame(np.vstack(all_features))
    return all_features_df, selected_filenames

In [9]:
# Load the saved model
logging.info("Loading the model")
model = load(model_filename)

# Number of frames to sample for testing
sample_size = 2000
batch_size = 10


2024-08-01 15:53:05,960 - INFO - Loading the model
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [10]:
logging.info("Extracting features from test data")
test_features_df, selected_filenames = process_test_data(new_test_frames_path, batch_size)

# Impute and scale the features
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()
test_features_imputed = imputer.fit_transform(test_features_df)
test_features_scaled = scaler.fit_transform(test_features_imputed)

# Predict on the test data
logging.info("Predicting on test data")
predictions = model.predict(test_features_scaled)

2024-08-01 15:53:09,945 - INFO - Extracting features from test data
2024-08-01 15:53:09,951 - INFO - Processing batch 1


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)

In [None]:
# Impute and scale the features
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()
test_features_imputed = imputer.fit_transform(test_features_df)
test_features_scaled = scaler.fit_transform(test_features_imputed)

# Predict on the test data
logging.info("Predicting on test data")
predictions = model.predict(test_features_scaled)

# Evaluate the model
accuracy = accuracy_score(true_labels, predictions)
report = classification_report(true_labels, predictions)
conf_matrix = confusion_matrix(true_labels, predictions)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)