In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Self Collect Dataset without pre processing**

In [None]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Label/train"
data_dir_test = "/content/drive/MyDrive/Label/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2}

IMG_SIZE = (64, 64)  # Maintain resizing only for consistent input dimensions

# Load images without preprocessing
def load_data(data_dir):
    X = []  # Features (raw images)
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image to maintain consistent dimensions
            img = cv2.resize(img, IMG_SIZE)
            img = img.flatten()  # Convert to 1D vector

            X.append(img)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load the train and test data
X_train, y_train = load_data(data_dir_train)
X_test, y_test = load_data(data_dir_test)

# Create and train KNN model
k = 3  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train, y_train)

# Predictions
y_pred = knn_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.38461538461538464
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.08      0.13        13
           1       0.55      0.50      0.52        22
           2       0.27      0.47      0.34        17

    accuracy                           0.38        52
   macro avg       0.44      0.35      0.33        52
weighted avg       0.44      0.38      0.37        52



# **Self Collect Dataset with pre processing**

In [None]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Label/train"
data_dir_test = "/content/drive/MyDrive/Label/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            color_features = extract_color_histogram(img)
            texture_features = extract_texture_features(img)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
X_train, y_train = load_data_and_extract_features(data_dir_train)
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 3  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.7307692307692307
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.08      0.14        13
           1       0.62      0.91      0.74        22
           2       0.89      1.00      0.94        17

    accuracy                           0.73        52
   macro avg       0.84      0.66      0.61        52
weighted avg       0.81      0.73      0.66        52



# **Self Collect Dataset with pre processing (img_sharpen)**

In [2]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Label/train"
data_dir_test = "/content/drive/MyDrive/Label/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def sharpen_image(img):
    # Create the sharpening kernel
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    # Sharpen the image
    sharpened_image = cv2.filter2D(img, -1, kernel)
    return sharpened_image

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            img_sharpen = sharpen_image(img)
            color_features = extract_color_histogram(img_sharpen)
            texture_features = extract_texture_features(img_sharpen)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
X_train, y_train = load_data_and_extract_features(data_dir_train)
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 3  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.08      0.14        13
           1       0.64      0.95      0.76        22
           2       0.94      1.00      0.97        17

    accuracy                           0.75        52
   macro avg       0.86      0.68      0.63        52
weighted avg       0.83      0.75      0.68        52



# **Self Collect Dataset with pre processing (img_enhance)**

In [3]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Label/train"
data_dir_test = "/content/drive/MyDrive/Label/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def enhance_image(img):
    enhance_img = cv2.convertScaleAbs(img, alpha=1.5, beta=20)
    return enhance_img

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            img_enhance = enhance_image(img)
            color_features = extract_color_histogram(img_enhance)
            texture_features = extract_texture_features(img_enhance)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
X_train, y_train = load_data_and_extract_features(data_dir_train)
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 3  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.08      0.14        13
           1       0.66      0.95      0.78        22
           2       0.89      1.00      0.94        17

    accuracy                           0.75        52
   macro avg       0.85      0.68      0.62        52
weighted avg       0.82      0.75      0.67        52



# **Self Collect Dataset with pre processing (remove_bg)**

In [4]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Label/train"
data_dir_test = "/content/drive/MyDrive/Label/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def remove_background(img):
    # Convert to grayscale and apply binary thresholding
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, binary_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Create mask for grabCut
    mask = np.zeros(img.shape[:2], np.uint8)
    mask[binary_mask == 255] = cv2.GC_PR_BGD
    mask[binary_mask == 0] = cv2.GC_PR_FGD

    # Define models
    bgdModel = np.zeros((1, 65), np.float64)
    fgdModel = np.zeros((1, 65), np.float64)

    # Run grabCut with the new mask
    cv2.grabCut(img, mask, None, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_MASK)

    # Modify mask to extract foreground
    mask2 = np.where((mask == cv2.GC_BGD) | (mask == cv2.GC_PR_BGD), 0, 1).astype('uint8')
    img_bg_removed = img * mask2[:, :, np.newaxis]

    return img_bg_removed

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            remove_background_img = remove_background(img)
            color_features = extract_color_histogram(remove_background_img)
            texture_features = extract_texture_features(remove_background_img)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
X_train, y_train = load_data_and_extract_features(data_dir_train)
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 3  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.15      0.27        13
           1       0.64      0.95      0.76        22
           2       0.94      0.94      0.94        17

    accuracy                           0.75        52
   macro avg       0.86      0.68      0.66        52
weighted avg       0.83      0.75      0.70        52



# **Public Dataset without pre processing**

In [None]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/train"
data_dir_test = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2, "Level4": 3, "Level5": 4}

IMG_SIZE = (64, 64)  # Maintain resizing only for consistent input dimensions

# Load images without preprocessing
def load_data(data_dir):
    X = []  # Features (raw images)
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image to maintain consistent dimensions
            img = cv2.resize(img, IMG_SIZE)
            img = img.flatten()  # Convert to 1D vector

            X.append(img)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load the train and test data
X_train, y_train = load_data(data_dir_train)
X_test, y_test = load_data(data_dir_test)

# Create and train KNN model
k = 5  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train, y_train)

# Predictions
y_pred = knn_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.5115489130434783
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.86      0.77      1071
           1       0.40      0.35      0.38       669
           2       0.39      0.45      0.42       827
           3       0.45      0.47      0.46       989
           4       0.52      0.30      0.39       860

    accuracy                           0.51      4416
   macro avg       0.49      0.49      0.48      4416
weighted avg       0.51      0.51      0.50      4416



# **Public Dataset with pre processing**

In [None]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/train"
data_dir_test = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2, "Level4": 3, "Level5": 4}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            color_features = extract_color_histogram(img)
            texture_features = extract_texture_features(img)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
X_train, y_train = load_data_and_extract_features(data_dir_train)
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 5  # Number of neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.734375
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.89      0.88      1071
           1       0.68      0.62      0.65       669
           2       0.67      0.71      0.69       827
           3       0.67      0.66      0.66       989
           4       0.75      0.74      0.74       860

    accuracy                           0.73      4416
   macro avg       0.73      0.72      0.72      4416
weighted avg       0.73      0.73      0.73      4416



# **Public Dataset with pre processing (img_sharpen)**

In [5]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/train"
data_dir_test = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2, "Level4": 3, "Level5": 4}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def sharpen_image(img):
    # Create the sharpening kernel
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    # Sharpen the image
    sharpened_image = cv2.filter2D(img, -1, kernel)
    return sharpened_image

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            img_sharpen = sharpen_image(img)
            color_features = extract_color_histogram(img_sharpen)
            texture_features = extract_texture_features(img_sharpen)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
print("Loading training data...")
X_train, y_train = load_data_and_extract_features(data_dir_train)
print("Loading testing data...")
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 5  # Number of neighbors
print("Training the KNN model...")
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
print("Making predictions...")
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7368659420289855
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1071
           1       0.69      0.59      0.63       669
           2       0.68      0.72      0.70       827
           3       0.65      0.68      0.67       989
           4       0.78      0.73      0.75       860

    accuracy                           0.74      4416
   macro avg       0.73      0.72      0.73      4416
weighted avg       0.74      0.74      0.74      4416



# **Public Dataset with pre processing (img_enhance)**



In [6]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/train"
data_dir_test = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2, "Level4": 3, "Level5": 4}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def enhance_image(img):
    enhance_img = cv2.convertScaleAbs(img, alpha=1.5, beta=20)
    return enhance_img

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            img_enhance = enhance_image(img)
            color_features = extract_color_histogram(img_enhance)
            texture_features = extract_texture_features(img_enhance)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
print("Loading training data...")
X_train, y_train = load_data_and_extract_features(data_dir_train)
print("Loading testing data...")
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 5  # Number of neighbors
print("Training the KNN model...")
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
print("Making predictions...")
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Loading training data...
Loading testing data...
Training the KNN model...
Making predictions...
Accuracy: 0.6788949275362319
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.90      0.88      1071
           1       0.68      0.59      0.63       669
           2       0.63      0.67      0.65       827
           3       0.56      0.59      0.57       989
           4       0.64      0.58      0.61       860

    accuracy                           0.68      4416
   macro avg       0.67      0.67      0.67      4416
weighted avg       0.68      0.68      0.68      4416



# **Public Dataset with pre processing (remove_bg)**

In [7]:
import os
import numpy as np
import cv2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Set paths
data_dir_train = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/train"
data_dir_test = "/content/drive/MyDrive/Hass Avocado Ripening Photographic Dataset/outfolder/test"
labels_dict = {"Level1": 0, "Level2": 1, "Level3": 2, "Level4": 3, "Level5": 4}

# Feature extraction parameters
COLOR_BINS = 32
IMG_SIZE = (64, 64)  # Image resizing dimensions

def remove_background(img):
    # Convert to grayscale and apply binary thresholding
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, binary_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Create mask for grabCut
    mask = np.zeros(img.shape[:2], np.uint8)
    mask[binary_mask == 255] = cv2.GC_PR_BGD
    mask[binary_mask == 0] = cv2.GC_PR_FGD

    # Define models
    bgdModel = np.zeros((1, 65), np.float64)
    fgdModel = np.zeros((1, 65), np.float64)

    # Run grabCut with the new mask
    cv2.grabCut(img, mask, None, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_MASK)

    # Modify mask to extract foreground
    mask2 = np.where((mask == cv2.GC_BGD) | (mask == cv2.GC_PR_BGD), 0, 1).astype('uint8')
    img_bg_removed = img * mask2[:, :, np.newaxis]

    return img_bg_removed

# Extract color histogram features
def extract_color_histogram(img):
    hist = cv2.calcHist([img], [0, 1, 2], None, [COLOR_BINS, COLOR_BINS, COLOR_BINS], [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Placeholder for texture feature extraction (can be extended)
def extract_texture_features(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img.flatten()[:COLOR_BINS]  # Simplified feature example

# Load images and extract features
def load_data_and_extract_features(data_dir):
    X = []  # Features
    y = []  # Labels

    for label_name, label_id in labels_dict.items():
        class_dir = os.path.join(data_dir, label_name)
        if not os.path.exists(class_dir):
            continue

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            # Resize image
            img = cv2.resize(img, IMG_SIZE)

            # Extract features
            remove_background_img = remove_background(img)
            color_features = extract_color_histogram(remove_background_img)
            texture_features = extract_texture_features(remove_background_img)
            features = np.concatenate((color_features, texture_features))

            X.append(features)
            y.append(label_id)

    return np.array(X), np.array(y)

# Load and preprocess the data
print("Loading training data...")
X_train, y_train = load_data_and_extract_features(data_dir_train)
print("Loading testing data...")
X_test, y_test = load_data_and_extract_features(data_dir_test)

# Normalize features
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Dimensionality reduction using PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_normalized)
X_test_pca = pca.transform(X_test_normalized)

# Create and train KNN model
k = 5  # Number of neighbors
print("Training the KNN model...")
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_pca, y_train)

# Predictions
print("Making predictions...")
y_pred = knn_model.predict(X_test_pca)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Loading training data...
Loading testing data...
Training the KNN model...
Making predictions...
Accuracy: 0.6698369565217391
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.89      0.87      1071
           1       0.66      0.58      0.62       669
           2       0.62      0.66      0.64       827
           3       0.55      0.58      0.56       989
           4       0.63      0.58      0.61       860

    accuracy                           0.67      4416
   macro avg       0.66      0.66      0.66      4416
weighted avg       0.67      0.67      0.67      4416

