In [2]:
import cv2
import os
import numpy as np
import time
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
    return rotated_image

def remove_background(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray_image, 30, 255, cv2.THRESH_BINARY)
    foreground = cv2.bitwise_and(image, image, mask=mask)
    return foreground

def preprocess_image(image):
    foreground = remove_background(image)
    gray_image = cv2.cvtColor(foreground, cv2.COLOR_BGR2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)
    resized_image = cv2.resize(blurred_image, (128, 128))
    return resized_image

def extract_hog_features(image):
    hog = cv2.HOGDescriptor()
    hog_features = hog.compute(image).flatten()
    return hog_features

def extract_lbp_features(image, P=8, R=1):
    lbp = np.zeros_like(image)
    for i in range(R, image.shape[0] - R):
        for j in range(R, image.shape[1] - R):
            center = image[i, j]
            binary_string = ''.join(['1' if image[i + dx, j + dy] >= center else '0'
                                     for dx, dy in [(-1, -1), (-1, 0), (-1, 1), (0, 1),
                                                    (1, 1), (1, 0), (1, -1), (0, -1)]])
            lbp[i, j] = int(binary_string, 2)
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 2 ** P), range=(0, 2 ** P))
    lbp_hist = lbp_hist / np.sum(lbp_hist)
    return lbp_hist

def load_images_from_folder(folder):
    X = []
    y = []
    classes = os.listdir(folder)
    for class_name in classes:
        class_folder = os.path.join(folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        for filename in os.listdir(class_folder):
            file_path = os.path.join(class_folder, filename)
            image = cv2.imread(file_path)
            if image is not None:
                preprocessed_image = preprocess_image(image)
                angles = [0,30,45,60,90,120,180]
                for angle in angles:
                    rotated_image = rotate_image(preprocessed_image, angle)
                    hog_features = extract_hog_features(rotated_image)
                    lbp_features = extract_lbp_features(rotated_image)
                    color_histogram = extract_color_histogram(image)
                    combined_features = np.concatenate((hog_features, lbp_features,color_histogram))
                    X.append(combined_features)
                    y.append(class_name)
    return np.array(X), np.array(y)


In [4]:
def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, rotation_matrix, (w, h))
    return rotated

def scale_image(image, scale):
    (h, w) = image.shape[:2]
    new_size = (int(w * scale), int(h * scale))
    resized = cv2.resize(image, new_size)
    return resized

def translate_image(image, x_shift, y_shift):
    translation_matrix = np.float32([[1, 0, x_shift], [0, 1, y_shift]])
    shifted = cv2.warpAffine(image, translation_matrix, (image.shape[1], image.shape[0]))
    return shifted

def adjust_brightness(image, factor):
    adjusted = cv2.convertScaleAbs(image, alpha=factor, beta=0)
    return adjusted

def add_gaussian_noise(image, mean=0, sigma=25):
    gauss = np.random.normal(mean, sigma, image.shape).astype('uint8')
    noisy = cv2.add(image, gauss)
    return noisy

In [5]:
def augment_image(image):
    augmented_images = []
    angles = [-15, 15, -30, 30]
    for angle in angles:
        augmented_images.append(rotate_image(image, angle))

    scales = [0.9, 1.1]
    for scale in scales:
        augmented_images.append(scale_image(image, scale))
    shifts = [(10, 10), (-10, -10)]
    for (x_shift, y_shift) in shifts:
        augmented_images.append(translate_image(image, x_shift, y_shift))

    brightness_factors = [0.7, 1.3]
    for factor in brightness_factors:
        augmented_images.append(adjust_brightness(image, factor))

    augmented_images.append(add_gaussian_noise(image))
    
    return augmented_images

In [9]:
def extract_features_from_image(image):
    preprocessed_image = preprocess_image(image)
    hog_features = extract_hog_features(preprocessed_image)
    lbp_features = extract_lbp_features(preprocessed_image)
    color_histogram = extract_color_histogram(image)
    combined_features = np.concatenate((hog_features, lbp_features, color_histogram))
    return combined_features

def load_images_with_features(folder):
    X = []
    y = []
    class_labels = os.listdir(folder)
    label_map = {label: idx for idx, label in enumerate(class_labels)}
    for label in class_labels:
        class_folder = os.path.join(folder, label)
        for filename in os.listdir(class_folder):
            file_path = os.path.join(class_folder, filename)
            image = cv2.imread(file_path)
            features = extract_features_from_image(image)
            X.append(features)
            y.append(label_map[label])
            augmented_images = augment_image(image)
            for aug_image in augmented_images:
                X.append(extract_features_from_image(aug_image))
                y.append(label_map[label])
    return np.array(X), np.array(y), label_map

X, y, label_map = load_images_with_features('folder')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
mlp = MLPClassifier(hidden_layer_sizes=(256,128,64), max_iter=500, alpha=0.001,solver='adam', random_state=42, verbose=True)
mlp.fit(X_train, y_train)

X_train_mlp = mlp.predict_proba(X_train)
X_test_mlp = mlp.predict_proba(X_test)

Iteration 1, loss = 1.82380029
Iteration 2, loss = 2.88595537
Iteration 3, loss = 2.04470340
Iteration 4, loss = 1.56322986
Iteration 5, loss = 1.35263438
Iteration 6, loss = 1.59158518
Iteration 7, loss = 1.22126009
Iteration 8, loss = 1.04892752
Iteration 9, loss = 0.96373044
Iteration 10, loss = 0.99363354
Iteration 11, loss = 0.74983254
Iteration 12, loss = 0.75863707
Iteration 13, loss = 0.56544571
Iteration 14, loss = 0.57398732
Iteration 15, loss = 0.49360526
Iteration 16, loss = 0.62010791
Iteration 17, loss = 0.41860953
Iteration 18, loss = 0.48735377
Iteration 19, loss = 0.29190794
Iteration 20, loss = 0.27705594
Iteration 21, loss = 0.28455055
Iteration 22, loss = 0.22262526
Iteration 23, loss = 0.27722383
Iteration 24, loss = 0.19057339
Iteration 25, loss = 0.21243932
Iteration 26, loss = 0.18690710
Iteration 27, loss = 0.15132028
Iteration 28, loss = 0.21679592
Iteration 29, loss = 0.15031708
Iteration 30, loss = 0.16012621
Iteration 31, loss = 0.12714367
Iteration 32, los



Combined Model Accuracy: 94.64%


In [16]:
estimator = DecisionTreeClassifier(max_depth=5)
ada_boost = AdaBoostClassifier(estimator=estimator, n_estimators=2, learning_rate=0.1, random_state=42)
ada_boost.fit(X_train_mlp, y_train)

y_pred = ada_boost.predict(X_test_mlp)
accuracy = accuracy_score(y_test, y_pred)
print(f"Combined Model Accuracy: {accuracy * 100:.2f}%")

Combined Model Accuracy: 94.64%




In [20]:

def predict_top_two_combined(frame, mlp_model, ada_model, label_map):
    features = extract_features_from_image(frame)
    mlp_probabilities = mlp_model.predict_proba([features])[0]
    ada_probabilities = ada_model.predict_proba([mlp_probabilities])[0]
    top_two_indices = np.argsort(ada_probabilities)[-2:][::-1]
    top_two_classes = [(list(label_map.keys())[i], ada_probabilities[i]) for i in top_two_indices]
    return top_two_classes

def live_product_detection_combined(mlp_model, ada_model, label_map):
    cap = cv2.VideoCapture(0)
    last_time = 0
    
    while True:
        ret, frame = cap.read()
        height, width = frame.shape[:2]
        box_size = 230
        top_left_x = (width - box_size) // 2
        top_left_y = (height - box_size) // 2
        bottom_right_x = top_left_x + box_size
        bottom_right_y = top_left_y + box_size
        cv2.rectangle(frame, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), (0, 0, 255), 2)
        cropped_frame = frame[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
        
        current_time = time.time()
        if current_time - last_time >= 2:
            top_two_predictions = predict_top_two_combined(cropped_frame, mlp_model, ada_model, label_map)
            last_time = current_time

        for i, (class_name, probability) in enumerate(top_two_predictions):
            if probability > 0.2:
                text = f"{class_name}: {probability * 100:.2f}%"
                cv2.putText(frame, text, (10, 30 + i * 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
        
        cv2.imshow("Live Product Detection", frame)
        if cv2.waitKey(1) & 0xFF == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

live_product_detection_combined(mlp, ada_boost, label_map)


In [21]:
def predict_top_two(frame, model, label_map):
    features = extract_features_from_image(frame)
    probabilities = model.predict_proba([features])[0]
    top_two_indices = np.argsort(probabilities)[-2:][::-1]
    top_two_classes = [(list(label_map.keys())[i], probabilities[i]) for i in top_two_indices]
    return top_two_classes

def live_product_detection(model, label_map):
    cap = cv2.VideoCapture(0)
    last_time = 0
    
    while True:
        ret, frame = cap.read()
        height, width = frame.shape[:2]
        box_size = 230
        top_left_x = (width - box_size) // 2
        top_left_y = (height - box_size) // 2
        bottom_right_x = top_left_x + box_size
        bottom_right_y = top_left_y + box_size
        cv2.rectangle(frame, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), (0, 0, 255), 2)
        cropped_frame = frame[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
        

        current_time = time.time()
        if current_time - last_time >= 2:
            top_two_predictions = predict_top_two(cropped_frame, model, label_map)
            last_time = current_time

        for i, (class_name, probability) in enumerate(top_two_predictions):
            if probability > 0.2:
                text = f"{class_name}: {probability * 100:.2f}%"
                cv2.putText(frame, text, (10, 30 + i * 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
        

        cv2.imshow("Live Product Detection", frame)
        if cv2.waitKey(1) & 0xFF == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

live_product_detection(mlp, label_map)