In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from skimage import feature
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [2]:
def extract_label(filename):
    if "frban" in filename:
        return "fresh banana"
    elif "frgrapp" in filename:
        return "fresh green apple"
    elif "rotapp" in filename:
        return "rotten apple"
    elif "rotban" in filename:
        return "rotten banana"
    elif "frredapp" in filename:
        return "fresh red apple"
    elif "frbg" in filename:
        return "fresh bitter gourd"
    elif "frcapsicum" in filename:
        return "fresh capsicum"
    elif "rotorange" in filename:
        return "rotten orange"
    elif "rotcapsicum" in filename:
        return "rotten capsicum"
    elif "rotbg" in filename:
        return "rotten bitter gourd"
    elif "frtomato" in filename:
        return "fresh tomato"
    elif "frorange" in filename:
        return "fresh orange"
    else:
        print(f"Unknown label for image: {filename}")
        return None

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_local_binary_patterns(image, num_points=24, radius=8):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = feature.local_binary_pattern(gray, num_points, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist

def extract_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten()

def extract_features(image):
    color_hist = extract_color_histogram(image)
    lbp_hist = extract_local_binary_patterns(image)
    edges = extract_edges(image)
    feature_vector = np.concatenate([color_hist, lbp_hist, edges])
    return feature_vector

In [4]:
def load_X_train_and_y_train_from_folder(folder_path):
    X_train = []
    y_train = []
    filenames = os.listdir(folder_path)
    for filename in tqdm(filenames, desc="Loading data"):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is not None:
                feature_vector = extract_features(image)
                y_train.append(extract_label(filename))
                X_train.append(feature_vector)
            else:
                print(f"Failed to read image: {image_path}")
    return X_train, y_train

In [6]:
def resize_X_train_in_folder(input_folder, output_folder, target_size=(224, 224)):
    filenames = os.listdir(input_folder)
    for filename in tqdm(filenames, desc="Resizing X_train"):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            input_image_path = os.path.join(input_folder, filename)
            output_image_path = os.path.join(output_folder, filename)
            image = cv2.imread(input_image_path)
            if image is not None:
                resized_image = cv2.resize(image, target_size)  
                cv2.imwrite(output_image_path, resized_image)
            else:
                pass

In [7]:
folder_path = "resized"
X, y = load_X_train_and_y_train_from_folder(folder_path)

Loading data: 100%|██████████| 5912/5912 [04:03<00:00, 24.26it/s]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_train = [label_to_number[label] for label in y_train]

In [10]:
rf_model = RandomForestClassifier(n_estimators=70, criterion='entropy')  
with tqdm(total=len(X_train), desc="Training Random Forest") as pbar:
    rf_model.fit(X_train, y_train)
    pbar.update(len(X_train))


Training Random Forest:   0%|          | 0/4729 [00:00<?, ?it/s]

Training Random Forest: 100%|██████████| 4729/4729 [00:29<00:00, 162.45it/s]


In [12]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_test = [label_to_number[label] for label in y_test]

In [13]:
y_pred = rf_model.predict(X_test)

In [16]:
with open('Random Forest Entropy.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

In [17]:
numeric_y_test_arr = np.array(numeric_y_test)
numeric_y_test_arr

array([11,  8,  4, ..., 11,  5,  5])

In [18]:
y_pred

array(['rotten orange', 'fresh tomato', 'fresh red apple', ...,
       'rotten orange', 'fresh bitter gourd', 'fresh bitter gourd'],
      dtype='<U19')

In [20]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_pred = [label_to_number[label] for label in y_pred]

In [21]:
accuracy_score(numeric_y_test_arr, numeric_y_pred)

0.9737954353338969

In [23]:
f1_score(numeric_y_test_arr, numeric_y_pred, average=None)

array([0.95604396, 0.97530864, 0.92857143, 0.93959732, 0.99421965,
       1.        , 1.        , 0.93203883, 1.        , 1.        ,
       1.        , 0.91836735])

In [24]:
f1_score(numeric_y_test_arr, numeric_y_pred, average='micro')

0.9737954353338969

In [25]:
precision_score(numeric_y_test_arr, numeric_y_pred, average=None)

array([0.93548387, 0.97530864, 0.92857143, 0.97222222, 0.98850575,
       1.        , 1.        , 0.91428571, 1.        , 1.        ,
       1.        , 0.9375    ])

In [26]:
recall_score(numeric_y_test_arr, numeric_y_pred, average=None)

array([0.97752809, 0.97530864, 0.92857143, 0.90909091, 1.        ,
       1.        , 1.        , 0.95049505, 1.        , 1.        ,
       1.        , 0.9       ])

In [27]:
confusion_matrix(numeric_y_test_arr, numeric_y_pred)

array([[ 87,   1,   0,   1,   0,   0,   0,   0,   0,   0,   0,   0],
       [  2,  79,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  1,   0,  65,   1,   1,   0,   0,   2,   0,   0,   0,   0],
       [  2,   0,   2,  70,   0,   0,   0,   0,   0,   0,   0,   3],
       [  0,   0,   0,   0,  86,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,  75,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 110,   0,   0,   0,   0,   0],
       [  1,   1,   0,   0,   0,   0,   0,  96,   0,   0,   0,   3],
       [  0,   0,   0,   0,   0,   0,   0,   0, 217,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  74,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 103,   0],
       [  0,   0,   3,   0,   0,   0,   0,   7,   0,   0,   0,  90]],
      dtype=int64)