In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import glob
import cv2
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3  # Import InceptionV3
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn import metrics
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import os

In [4]:
# Define the image size
SIZE = 256  

# Path to the dataset
dataset_path = "G:/datasets/Tomato fruits dataset for binary and multiclass classification/Two Classes"
print(os.listdir(dataset_path))

['Healthy', 'Reject']


In [6]:
# Capture images and labels into respective lists
images = []
labels = []
for directory_path in glob.glob(dataset_path + "/*"):
    label = directory_path.split("\\")[-1]
    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (SIZE, SIZE))
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)
        labels.append(label)


In [8]:

# Convert lists to arrays        
images = np.array(images)
labels = np.array(labels)

In [10]:
# Encode labels from text to integers
le = preprocessing.LabelEncoder()
labels_encoded = le.fit_transform(labels)


In [12]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(images, labels_encoded, test_size=0.2, random_state=42, stratify=labels_encoded)

In [14]:
# Normalize pixel values to between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

In [16]:
# One hot encode y values for neural network (if needed later)
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

In [18]:
# Load InceptionV3 model without fully connected layers (pretrained on ImageNet)
Inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))

In [19]:
# Extract features from images using the pre-trained InceptionV3 model
train_features = Inception_model.predict(x_train)
train_features = train_features.reshape(train_features.shape[0], -1)  # Flatten features for KNN

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 4s/step


In [22]:
test_features = Inception_model.predict(x_test)
test_features = test_features.reshape(test_features.shape[0], -1)  # Flatten features for KNN

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3s/step


In [24]:
# Set up k-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [26]:
# KNN model
KNN_model = KNeighborsClassifier(n_neighbors=5)

In [28]:
# Arrays to store the results for each fold
precision_scores = []
recall_scores = []
f1_scores = []
accuracy_scores = []

In [34]:
# Perform k-fold cross-validation
for train_index, val_index in kf.split(train_features, y_train):
    X_train_fold, X_val_fold = train_features[train_index], train_features[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

In [36]:
# Train KNN on each fold
KNN_model.fit(X_train_fold, y_train_fold)

In [38]:
# Predict on the validation set for this fold
y_val_pred = KNN_model.predict(X_val_fold)

[WinError 2] The system cannot find the file specified
  File "C:\Users\Indhu\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


In [44]:
 # Calculate accuracy, precision, recall, and F1 score for this fold
accuracy = metrics.accuracy_score(y_val_fold, y_val_pred)
precision = precision_score(y_val_fold, y_val_pred, average='weighted')
recall = recall_score(y_val_fold, y_val_pred, average='weighted')
f1 = f1_score(y_val_fold, y_val_pred, average='weighted')
    
    # Append results to the arrays
accuracy_scores.append(accuracy)
precision_scores.append(precision)
recall_scores.append(recall)
f1_scores.append(f1)

In [46]:
# Calculate average scores across all folds
avg_accuracy = np.mean(accuracy_scores)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1 = np.mean(f1_scores)

print(f"K-Fold Cross Validation Results (KNN):")
print(f"Average Accuracy: {avg_accuracy}")
print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")

K-Fold Cross Validation Results (KNN):
Average Accuracy: 0.8489583333333334
Average Precision: 0.8629385964912281
Average Recall: 0.8489583333333334
Average F1 Score: 0.838003663003663


In [48]:
# Confusion matrix, precision, recall, F1 score for the test set
y_pred_knn = KNN_model.predict(test_features)
y_pred_labels_knn = le.inverse_transform(y_pred_knn)

In [64]:
cm = confusion_matrix(le.inverse_transform(y_test), y_pred_labels_knn)
precision_per_class = precision_score(le.inverse_transform(y_test), y_pred_labels_knn, average=None)
recall_per_class = recall_score(le.inverse_transform(y_test), y_pred_labels_knn, average=None)
f1_per_class = f1_score(le.inverse_transform(y_test), y_pred_labels_knn, average=None)

print(f"\nConfusion Matrix:\n{cm}")

# Display confusion matrix and per-class precision, recall, F1 score
for i in range(len(cm)):
    tp = cm[i, i]  # True positives for class i
    fn = np.sum(cm[i, :]) - tp  # False negatives for class i
    fp = np.sum(cm[:, i]) - tp  # False positives for class i
    tn = np.sum(cm) - (tp + fp + fn)  # True negatives for class i
    
    print(f"\nClass {i}: \n\t")
    print(f"True Positives (TP): {tp}")
    print(f"False Positives (FP): {fp}")
    print(f"False Negatives (FN): {fn}")
    print(f"True Negatives (TN): {tn}")
    print(f"Precision: {precision_per_class[i]}")
    print(f"Recall: {recall_per_class[i]}")
    print(f"F1 Score: {f1_per_class[i]}")



Confusion Matrix:
[[312   8]
 [ 66  94]]

Class 0: 
	
True Positives (TP): 312
False Positives (FP): 66
False Negatives (FN): 8
True Negatives (TN): 94
Precision: 0.8253968253968254
Recall: 0.975
F1 Score: 0.8939828080229226

Class 1: 
	
True Positives (TP): 94
False Positives (FP): 8
False Negatives (FN): 66
True Negatives (TN): 312
Precision: 0.9215686274509803
Recall: 0.5875
F1 Score: 0.7175572519083969
