In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from skimage import feature
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [2]:
def extract_label(filename):
    if "frban" in filename:
        return "fresh banana"
    elif "frgrapp" in filename:
        return "fresh green apple"
    elif "rotapp" in filename:
        return "rotten apple"
    elif "rotban" in filename:
        return "rotten banana"
    elif "frredapp" in filename:
        return "fresh red apple"
    elif "frbg" in filename:
        return "fresh bitter gourd"
    elif "frcapsicum" in filename:
        return "fresh capsicum"
    elif "rotorange" in filename:
        return "rotten orange"
    elif "rotcapsicum" in filename:
        return "rotten capsicum"
    elif "rotbg" in filename:
        return "rotten bitter gourd"
    elif "frtomato" in filename:
        return "fresh tomato"
    elif "frorange" in filename:
        return "fresh orange"
    else:
        print(f"Unknown label for image: {filename}")
        return None

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_local_binary_patterns(image, num_points=24, radius=8):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = feature.local_binary_pattern(gray, num_points, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist

def extract_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten()

def extract_features(image):
    color_hist = extract_color_histogram(image)
    lbp_hist = extract_local_binary_patterns(image)
    edges = extract_edges(image)
    feature_vector = np.concatenate([color_hist, lbp_hist, edges])
    return feature_vector

In [4]:
def load_X_train_and_y_train_from_folder(folder_path):
    X_train = []
    y_train = []
    filenames = os.listdir(folder_path)
    for filename in tqdm(filenames, desc="Loading data"):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is not None:
                feature_vector = extract_features(image)
                y_train.append(extract_label(filename))
                X_train.append(feature_vector)
            else:
                print(f"Failed to read image: {image_path}")
    return X_train, y_train

In [5]:
def evaluate_model(true_y_train, predicted_y_train):
    numeric_predicted_y_train = [label_to_number[label] for label in predicted_y_train]
    
    accuracy = accuracy_score(true_y_train, numeric_predicted_y_train)
    f1 = f1_score(true_y_train, numeric_predicted_y_train, average='weighted')
    precision = precision_score(true_y_train, numeric_predicted_y_train, average='weighted')
    recall = recall_score(true_y_train, numeric_predicted_y_train, average='weighted')
    conf_matrix = confusion_matrix(true_y_train, numeric_predicted_y_train)
    
    return accuracy, f1, precision, recall, conf_matrix

In [6]:
def resize_X_train_in_folder(input_folder, output_folder, target_size=(224, 224)):
    filenames = os.listdir(input_folder)
    for filename in tqdm(filenames, desc="Resizing X_train"):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            input_image_path = os.path.join(input_folder, filename)
            output_image_path = os.path.join(output_folder, filename)
            image = cv2.imread(input_image_path)
            if image is not None:
                resized_image = cv2.resize(image, target_size)  
                cv2.imwrite(output_image_path, resized_image)
            else:
                pass

In [7]:
folder_path = "resized"
X, y = load_X_train_and_y_train_from_folder(folder_path)

Loading data: 100%|██████████| 5912/5912 [03:43<00:00, 26.46it/s]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_train = [label_to_number[label] for label in y_train]

In [12]:
rf_model = RandomForestClassifier(n_estimators=100)  
with tqdm(total=len(X_train), desc="Training Random Forest") as pbar:
    rf_model.fit(X_train, y_train)
    pbar.update(len(X_train))


Training Random Forest: 100%|██████████| 4729/4729 [00:40<00:00, 117.01it/s]


In [10]:
with open('model.pkl', 'rb') as file:
    rf_model = pickle.load(file)

In [11]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_test = [label_to_number[label] for label in y_test]

In [12]:
y_pred = rf_model.predict(X_test)

In [13]:
y_pred

array([11,  8,  4, ..., 11,  5,  5], dtype=int64)

In [23]:
with open('Random Forest.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

In [18]:
numeric_y_test_arr = np.array(numeric_y_test)
numeric_y_test_arr

array([11,  8,  4, ..., 11,  5,  5])

In [19]:
y_pred

array([11,  8,  4, ..., 11,  5,  5], dtype=int64)

In [21]:
accuracy_score(numeric_y_test_arr, y_pred)

0.989010989010989

In [24]:
f1_score(numeric_y_test_arr, y_pred, average=None)

array([0.97752809, 0.98780488, 0.96453901, 0.97368421, 0.98823529,
       1.        , 1.        , 0.98507463, 0.99770115, 1.        ,
       0.99516908, 0.98      ])

In [25]:
f1_score(numeric_y_test_arr, y_pred, average='micro')

0.989010989010989

In [26]:
precision_score(numeric_y_test_arr, y_pred, average=None)

array([0.97752809, 0.97590361, 0.95774648, 0.98666667, 1.        ,
       1.        , 1.        , 0.99      , 0.99541284, 1.        ,
       0.99038462, 0.98      ])

In [None]:
recall_score(numeric_y_test_arr, y_pred, average=None)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
confusion_matrix(numeric_y_test_arr, y_pred)

array([[ 89,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,  81,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,  70,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,  77,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,  86,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,  75,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 110,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0, 101,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0, 217,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  74,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 103,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 100]],
      dtype=int64)