In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn import svm
from skimage import feature

def load_images_and_labels_from_folder(folder_path):
    images = []
    labels = []
    filenames = os.listdir(folder_path)
    for filename in tqdm(filenames, desc="Loading images"):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is not None:
                feature_vector = extract_features(image)
                labels.append(extract_label(filename))
                images.append(feature_vector)
            else:
                print(f"Failed to read image: {image_path}")
    return images, labels

def extract_label(filename):
    if "frban" in filename:
        return "fresh banana"
    elif "frgrapp" in filename:
        return "fresh green apple"
    elif "rotapp" in filename:
        return "rotten apple"
    elif "rotban" in filename:
        return "rotten banana"
    elif "frredapp" in filename:
        return "fresh red apple"
    else:
        print(f"Unknown label for image: {filename}")
        return None

def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_local_binary_patterns(image, num_points=24, radius=8):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = feature.local_binary_pattern(gray, num_points, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist

def extract_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten()

def extract_features(image):
    color_hist = extract_color_histogram(image)
    lbp_hist = extract_local_binary_patterns(image)
    edges = extract_edges(image)
    feature_vector = np.concatenate([color_hist, lbp_hist, edges])
    return feature_vector

In [2]:
folder_path = "resized ds 128"
images, labels = load_images_and_labels_from_folder(folder_path)

Loading images:   0%|          | 0/2000 [00:00<?, ?it/s]

Loading images: 100%|██████████| 2000/2000 [01:08<00:00, 29.19it/s]


In [None]:
images

In [None]:
labels

In [8]:
# Initialize a dictionary to store the counts
label_counts = {}

# Count the occurrences of each label
for label in labels:
    if label in label_counts:
        label_counts[label] += 1
    else:
        label_counts[label] = 1

# Print the counts for each label
for label, count in label_counts.items():
    print(f"{label}: {count} samples")


fresh banana: 50 samples
fresh green apple: 49 samples
fresh red apple: 50 samples
rotten apple: 50 samples
rotten banana: 50 samples


In [None]:
labels

In [3]:
from sklearn.svm import SVC
from tqdm import tqdm

# Convert labels to numerical values
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4}
numeric_labels = [label_to_number[label] for label in labels]

# Train SVM model
svm_model = SVC(kernel='linear')  # You can adjust the kernel as needed

# Training SVM model with tqdm
with tqdm(total=len(images), desc="Training SVM") as pbar:
    svm_model.fit(images, numeric_labels)
    pbar.update(len(images))


Training SVM:   0%|          | 0/2000 [00:00<?, ?it/s]

Training SVM: 100%|██████████| 2000/2000 [04:31<00:00,  7.36it/s]


In [4]:
# Test data (replace with your actual test data)
test_folder_path = "eval 128"
test_images, test_labels = load_images_and_labels_from_folder(test_folder_path)

# Convert test labels to numerical values
numeric_test_labels = [label_to_number[label] for label in test_labels]

# Predict labels for test images using the trained SVM model
predicted_labels = svm_model.predict(test_images)

# Convert predicted labels back to original labels
predicted_labels = [list(label_to_number.keys())[label] for label in predicted_labels]

# Calculate accuracy
correct_predictions = sum(1 for true_label, predicted_label in zip(test_labels, predicted_labels) if true_label == predicted_label)
total_test_samples = len(test_labels)
accuracy = correct_predictions / total_test_samples

print(f"Accuracy on test data: {accuracy:.2f}")

Loading images: 100%|██████████| 15/15 [00:00<00:00, 26.94it/s]


Accuracy on test data: 0.13
