In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from skimage import feature
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [2]:
def extract_label(filename):
    if "frban" in filename:
        return "fresh banana"
    elif "frgrapp" in filename:
        return "fresh green apple"
    elif "rotapp" in filename:
        return "rotten apple"
    elif "rotban" in filename:
        return "rotten banana"
    elif "frredapp" in filename:
        return "fresh red apple"
    elif "frbg" in filename:
        return "fresh bitter gourd"
    elif "frcapsicum" in filename:
        return "fresh capsicum"
    elif "rotorange" in filename:
        return "rotten orange"
    elif "rotcapsicum" in filename:
        return "rotten capsicum"
    elif "rotbg" in filename:
        return "rotten bitter gourd"
    elif "frtomato" in filename:
        return "fresh tomato"
    elif "frorange" in filename:
        return "fresh orange"
    else:
        print(f"Unknown label for image: {filename}")
        return None

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_local_binary_patterns(image, num_points=24, radius=8):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = feature.local_binary_pattern(gray, num_points, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist

def extract_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten()

def extract_features(image):
    color_hist = extract_color_histogram(image)
    lbp_hist = extract_local_binary_patterns(image)
    edges = extract_edges(image)
    feature_vector = np.concatenate([color_hist, lbp_hist, edges])
    return feature_vector

In [4]:
def load_X_train_and_y_train_from_folder(folder_path):
    X_train = []
    y_train = []
    filenames = os.listdir(folder_path)
    for filename in tqdm(filenames, desc="Loading data"):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is not None:
                feature_vector = extract_features(image)
                y_train.append(extract_label(filename))
                X_train.append(feature_vector)
            else:
                print(f"Failed to read image: {image_path}")
    return X_train, y_train

In [5]:
def evaluate_model(true_y_train, numeric_predicted_y_train):    
    accuracy = accuracy_score(true_y_train, numeric_predicted_y_train)
    f1 = f1_score(true_y_train, numeric_predicted_y_train, average='micro')
    precision = precision_score(true_y_train, numeric_predicted_y_train, average='None')
    recall = recall_score(true_y_train, numeric_predicted_y_train, average='weighted')
    conf_matrix = confusion_matrix(true_y_train, numeric_predicted_y_train)
    
    return accuracy, f1, precision, recall, conf_matrix

In [6]:
def resize_X_train_in_folder(input_folder, output_folder, target_size=(224, 224)):
    filenames = os.listdir(input_folder)
    for filename in tqdm(filenames, desc="Resizing X_train"):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            input_image_path = os.path.join(input_folder, filename)
            output_image_path = os.path.join(output_folder, filename)
            image = cv2.imread(input_image_path)
            if image is not None:
                resized_image = cv2.resize(image, target_size)  
                cv2.imwrite(output_image_path, resized_image)
            else:
                pass

In [7]:
folder_path = "test_resized"
X_test, y_test = load_X_train_and_y_train_from_folder(folder_path)

Loading data:   1%|          | 8/821 [00:00<00:31, 25.62it/s]

Loading data: 100%|██████████| 821/821 [00:30<00:00, 27.07it/s]


In [8]:
with open('rf.pkl', 'rb') as file:
    rf = pickle.load(file)

In [18]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_test = [label_to_number[label] for label in y_test]
numeric_y_test

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [16]:
y_pred = rf.predict(X_test)
y_pred

array(['fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'rotten banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'rotten apple',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
       'fresh banana', 'fresh banana', 'fresh banana', 'fresh banana',
     

In [19]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_pred = [label_to_number[label] for label in y_pred]
numeric_y_pred

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 11,
 0,
 0,
 0,
 0,
 9,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,

In [17]:
# Create a mapping from labels to integers
label_to_index = {label: index for index, label in enumerate(label_to_number.keys())}

# Use the mapping to convert string labels to integers
predicted_y_test = [list(label_to_number.keys())[label_to_index[label]] for label in y_pred]
predicted_y_test


['fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'rotten banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'rotten apple',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banana',
 'fresh banan

In [12]:
correct_predictions = sum(1 for true_label, predicted_label in zip(y_test, predicted_y_test) if true_label == predicted_label)
total_test_samples = len(y_test)
accuracy = correct_predictions / total_test_samples
print("Accuracy:", accuracy)


Accuracy: 0.8380024360535931


In [13]:
numeric_y_test_arr = np.array(numeric_y_test)
numeric_y_test_arr

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

In [20]:
accuracy_score(numeric_y_test_arr, numeric_y_pred)

0.8380024360535931

In [21]:
f1_score(numeric_y_test_arr, numeric_y_pred, average='micro')

0.8380024360535931

In [22]:
precision_score(numeric_y_test_arr, numeric_y_pred, average=None)

array([0.90566038, 0.63461538, 0.83098592, 0.95789474, 0.76666667,
       0.9       , 1.        , 0.95049505, 1.        , 0.        ,
       1.        , 0.89908257])

In [23]:
recall_score(numeric_y_test_arr, numeric_y_pred, average=None)

array([0.96      , 0.98019802, 0.57843137, 0.9009901 , 0.90196078,
       1.        , 0.25      , 0.95049505, 0.92857143, 0.        ,
       0.94444444, 0.98      ])

In [24]:
confusion_matrix(numeric_y_test_arr, numeric_y_pred)

array([[96,  0,  1,  1,  0,  0,  0,  0,  0,  1,  0,  1],
       [ 2, 99,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 5,  2, 59,  3, 25,  0,  0,  3,  0,  0,  0,  5],
       [ 1,  0,  1, 91,  2,  0,  0,  0,  0,  4,  0,  2],
       [ 0,  0,  9,  0, 92,  0,  0,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0],
       [ 0, 54,  0,  0,  0,  0, 18,  0,  0,  0,  0,  0],
       [ 1,  1,  0,  0,  0,  0,  0, 96,  0,  0,  0,  3],
       [ 0,  0,  0,  0,  1,  0,  0,  0, 13,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0, 17,  0],
       [ 0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0, 98]], dtype=int64)