In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from skimage import feature
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [2]:
def extract_label(filename):
    if "frban" in filename:
        return "fresh banana"
    elif "frgrapp" in filename:
        return "fresh green apple"
    elif "rotapp" in filename:
        return "rotten apple"
    elif "rotban" in filename:
        return "rotten banana"
    elif "frredapp" in filename:
        return "fresh red apple"
    elif "frbg" in filename:
        return "fresh bitter gourd"
    elif "frcapsicum" in filename:
        return "fresh capsicum"
    elif "rotorange" in filename:
        return "rotten orange"
    elif "rotcapsicum" in filename:
        return "rotten capsicum"
    elif "rotbg" in filename:
        return "rotten bitter gourd"
    elif "frtomato" in filename:
        return "fresh tomato"
    elif "frorange" in filename:
        return "fresh orange"
    else:
        print(f"Unknown label for image: {filename}")
        return None

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten()

def compute_texture_gradients(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
    # Compute texture gradients using Sobel operator
    sobel_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
    
    # Compute magnitude of gradients
    gradient_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
    
    # Compute mean and standard deviation of gradient magnitude
    mean_gradient = np.mean(gradient_magnitude)
    std_gradient = np.std(gradient_magnitude)
    
    # Compute texture gradients feature vector
    texture_gradients = np.array([mean_gradient, std_gradient])
    
    return texture_gradients

def extract_features(image):
    color_hist = extract_color_histogram(image)
    ta = compute_texture_gradients(image)
    edges = extract_edges(image)
    feature_vector = np.concatenate([color_hist, ta, edges])
    return feature_vector

In [4]:
def load_X_train_and_y_train_from_folder(folder_path):
    X_train = []
    y_train = []
    filenames = os.listdir(folder_path)
    for filename in tqdm(filenames, desc="Loading data"):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is not None:
                feature_vector = extract_features(image)
                y_train.append(extract_label(filename))
                X_train.append(feature_vector)
            else:
                print(f"Failed to read image: {image_path}")
    return X_train, y_train

In [5]:
def evaluate_model(true_y_train, predicted_y_train):
    numeric_predicted_y_train = [label_to_number[label] for label in predicted_y_train]
    
    accuracy = accuracy_score(true_y_train, numeric_predicted_y_train)
    f1 = f1_score(true_y_train, numeric_predicted_y_train, average='weighted')
    precision = precision_score(true_y_train, numeric_predicted_y_train, average='weighted')
    recall = recall_score(true_y_train, numeric_predicted_y_train, average='weighted')
    conf_matrix = confusion_matrix(true_y_train, numeric_predicted_y_train)
    
    return accuracy, f1, precision, recall, conf_matrix

In [6]:
def resize_X_train_in_folder(input_folder, output_folder, target_size=(224, 224)):
    filenames = os.listdir(input_folder)
    for filename in tqdm(filenames, desc="Resizing X_train"):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            input_image_path = os.path.join(input_folder, filename)
            output_image_path = os.path.join(output_folder, filename)
            image = cv2.imread(input_image_path)
            if image is not None:
                resized_image = cv2.resize(image, target_size)  
                cv2.imwrite(output_image_path, resized_image)
            else:
                pass

In [7]:
folder_path = "resized"
X, y = load_X_train_and_y_train_from_folder(folder_path)

Loading data:   0%|          | 0/5912 [00:00<?, ?it/s]

Loading data: 100%|██████████| 5912/5912 [00:19<00:00, 306.54it/s]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
X_train[0:5]

[array([0.19285654, 0.00214785, 0.        , ..., 0.        , 0.        ,
        0.        ]),
 array([0.12904608, 0.        , 0.00029809, ..., 0.        , 0.        ,
        0.        ]),
 array([0.30730838, 0.0247114 , 0.01153198, ..., 0.        , 0.        ,
        0.        ]),
 array([3.22339356e-01, 1.80548552e-04, 3.55680659e-02, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 array([0.43959033, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ])]

In [10]:
y_train[0:5]

['rotten banana',
 'fresh tomato',
 'rotten bitter gourd',
 'fresh tomato',
 'fresh green apple']

In [11]:
X_train[0].shape

(50690,)

In [12]:
label_counts = {}

for label in y_train:
    if label in label_counts:
        label_counts[label] += 1
    else:
        label_counts[label] = 1

for label, count in label_counts.items():
    print(f"{label}: {count} samples")

rotten banana: 321 samples
fresh tomato: 999 samples
rotten bitter gourd: 283 samples
fresh green apple: 319 samples
fresh capsicum: 390 samples
rotten apple: 330 samples
fresh red apple: 313 samples
rotten orange: 400 samples
fresh bitter gourd: 252 samples
fresh banana: 310 samples
rotten capsicum: 413 samples
fresh orange: 399 samples


In [13]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_train = [label_to_number[label] for label in y_train]

In [14]:
numeric_y_train[:5]

[3, 8, 9, 8, 1]

In [15]:
import xgboost as xgb

xgb_classifier = xgb.XGBClassifier()

xgb_classifier.fit(X_train, numeric_y_train)

In [16]:
with open('xgb_traintestsplit.pkl', 'wb') as file:
    pickle.dump(xgb_classifier, file)

In [18]:
y_test[:5]

['rotten orange',
 'fresh tomato',
 'fresh red apple',
 'fresh tomato',
 'fresh tomato']

In [19]:
label_to_number = {"fresh banana": 0, "fresh green apple": 1, "rotten apple": 2, "rotten banana": 3, "fresh red apple": 4, "fresh bitter gourd": 5, "fresh capsicum": 6, "fresh orange": 7, "fresh tomato": 8, "rotten bitter gourd": 9, "rotten capsicum": 10, "rotten orange": 11}
numeric_y_test = [label_to_number[label] for label in y_test]

In [20]:
numeric_y_test[:5]

[11, 8, 4, 8, 8]

In [21]:
type(numeric_y_test)

list

In [22]:
X_test[0].shape

(50690,)

In [23]:
y_pred = xgb_classifier.predict(X_test)

predicted_y_test = [list(label_to_number.keys())[label] for label in y_pred]

In [24]:
y_pred[:5]

array([11,  8,  4,  8,  8], dtype=int64)

In [25]:
type(y_pred)

numpy.ndarray

In [26]:
predicted_y_test[0:5]

['rotten orange',
 'fresh tomato',
 'fresh red apple',
 'fresh tomato',
 'fresh tomato']

In [27]:
correct_predictions = sum(1 for true_label, predicted_label in zip(y_test, predicted_y_test) if true_label == predicted_label)
total_test_samples = len(y_test)
accuracy = correct_predictions / total_test_samples
print("Accuracy:", accuracy)


Accuracy: 0.9915469146238377


In [28]:
numeric_y_test_arr = np.array(numeric_y_test)
numeric_y_test_arr

array([11,  8,  4, ..., 11,  5,  5])

In [29]:
accuracy_score(numeric_y_test_arr, y_pred)

0.9915469146238377

In [30]:
f1_score(numeric_y_test_arr, y_pred, average=None)

array([0.98876404, 0.99386503, 0.95104895, 0.98039216, 0.98823529,
       1.        , 1.        , 0.98507463, 1.        , 1.        ,
       1.        , 0.99      ])

In [31]:
f1_score(numeric_y_test_arr, y_pred, average='micro')

0.9915469146238377

In [32]:
precision_score(numeric_y_test_arr, y_pred, average=None)

array([0.98876404, 0.98780488, 0.93150685, 0.98684211, 1.        ,
       1.        , 1.        , 0.99      , 1.        , 1.        ,
       1.        , 0.99      ])

In [33]:
recall_score(numeric_y_test_arr, y_pred, average=None)

array([0.98876404, 1.        , 0.97142857, 0.97402597, 0.97674419,
       1.        , 1.        , 0.98019802, 1.        , 1.        ,
       1.        , 0.99      ])

In [34]:
confusion_matrix(numeric_y_test_arr, y_pred)

array([[ 88,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,  81,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,  68,   1,   0,   0,   0,   1,   0,   0,   0,   0],
       [  1,   0,   1,  75,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   2,   0,  84,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,  75,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 110,   0,   0,   0,   0,   0],
       [  0,   0,   1,   0,   0,   0,   0,  99,   0,   0,   0,   1],
       [  0,   0,   0,   0,   0,   0,   0,   0, 217,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  74,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 103,   0],
       [  0,   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,  99]],
      dtype=int64)