In [4]:
import numpy as np 
import matplotlib.pyplot as plt
import glob
import cv2
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3  # Import InceptionV3
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from keras.utils import to_categorical
import os

In [5]:
# Define the image size
SIZE = 256  

# Path to the dataset
dataset_path = "F:/indhu/Datasets/Tomato fruits dataset for binary and multiclass classification/Two Classes"
print(os.listdir(dataset_path))

['Healthy', 'Reject']


In [6]:
# Capture images and labels into respective lists
images = []
labels = []
for directory_path in glob.glob(dataset_path + "/*"):
    label = directory_path.split("\\")[-1]
    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (SIZE, SIZE))
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)
        labels.append(label)

In [8]:
# Convert lists to arrays        
images = np.array(images)
labels = np.array(labels)

In [9]:

# Encode labels from text to integers
le = preprocessing.LabelEncoder()
labels_encoded = le.fit_transform(labels)

In [10]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(images, labels_encoded, test_size=0.2, random_state=42, stratify=labels_encoded)

In [11]:
# Normalize pixel values to between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

In [12]:
# One hot encode y values for neural network (if needed later)
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

In [13]:
# Load InceptionV3 model without fully connected layers (pretrained on ImageNet)
Inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))

In [14]:
# Use the pre-trained InceptionV3 model to extract features from training images
train_features = Inception_model.predict(x_train)
train_features = train_features.reshape(train_features.shape[0], -1)  # Flatten features for SVM

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 529ms/step


In [15]:
# Train KNN on the extracted features
KNN_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust 'n_neighbors' as needed
KNN_model.fit(train_features, y_train)




In [16]:
# Extract features from testing images
test_features = Inception_model.predict(x_test)
test_features = test_features.reshape(test_features.shape[0], -1)  # Flatten features for SVM


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 476ms/step


In [17]:
# Predict on test data using the KNN model
y_pred_knn = KNN_model.predict(test_features)



In [18]:
# Inverse transform the predicted labels to their original form
y_pred_labels_knn = le.inverse_transform(y_pred_knn)



In [18]:
# Calculate accuracy of the KNN model
accuracy_knn = metrics.accuracy_score(le.inverse_transform(y_test), y_pred_labels_knn)
print("KNN Accuracy = ", accuracy_knn)

KNN Accuracy =  0.9316239316239316


In [19]:
models=[]
models.append({'KNN', KNeighborsClassifier(n_neighbors=8)})
models.append({'RandomForestClassifier', RandomForestClassifier()})
models.append({'svm', SVC(kernel='linear')})

In [36]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import numpy as np

# Define a list of models to iterate over
models = [
    KNeighborsClassifier(),
    RandomForestClassifier(),
    SVC()
]
results = []
# Loop over each model, fit it, and evaluate it
for model in models:
    print(f"Training and evaluating model: {model.__class__.__name__}")
       
    # Fit the model
    model.fit(train_features, y_train)
    
    # Make predictions
    y_pred = model.predict(test_features)

    # Inverse transform the predicted labels to their original form
    y_pred_labels = le.inverse_transform(y_pred)
    
    # Confusion matrix
    cm = confusion_matrix(le.inverse_transform(y_test), y_pred_labels)
    print(f"\nConfusion Matrix for {model.__class__.__name__}:\n{cm}")
    
    # Accuracy
    accuracy = accuracy_score(le.inverse_transform(y_test), y_pred_labels)
    print(f"Accuracy: {accuracy:.4f}")

    # Precision, recall, and F1 score for each class
    precision_per_class = precision_score(le.inverse_transform(y_test), y_pred_labels, average=None)
    recall_per_class = recall_score(le.inverse_transform(y_test), y_pred_labels, average=None)
    f1_per_class = f1_score(le.inverse_transform(y_test), y_pred_labels, average=None)
  # Append the results to the list
    results.append({
        'Model': model.__class__.__name__,
        'Accuracy': accuracy,
        'Precision': precision_per_class,
        'Recall': recall_per_class,
        'F1 Score': f1_per_class
})
    # For each class, calculate TP, FP, FN, TN
    for i in range(len(cm)):
        tp = cm[i, i]  # True positives for class i
        fn = np.sum(cm[i, :]) - tp  # False negatives for class i
        fp = np.sum(cm[:, i]) - tp  # False positives for class i
        tn = np.sum(cm) - (tp + fp + fn)  # True negatives for class i
        
        print(f"\nClass {i}:")
        print(f"True Positives (TP): {tp}")
        print(f"False Positives (FP): {fp}")
        print(f"False Negatives (FN): {fn}")
        print(f"True Negatives (TN): {tn}")
        
        # Print precision, recall, and F1 score for each class
        print(f"Precision: {precision_per_class[i]:.4f}")
        print(f"Recall: {recall_per_class[i]:.4f}")
        print(f"F1 Score: {f1_per_class[i]:.4f}")
    
    # Print a separator line for clarity
    print("\n" + "-"*50 + "\n")


Training and evaluating model: KNeighborsClassifier

Confusion Matrix for KNeighborsClassifier:
[[320   1]
 [ 23   7]]
Accuracy: 0.9316

Class 0:
True Positives (TP): 320
False Positives (FP): 23
False Negatives (FN): 1
True Negatives (TN): 7
Precision: 0.9329
Recall: 0.9969
F1 Score: 0.9639

Class 1:
True Positives (TP): 7
False Positives (FP): 1
False Negatives (FN): 23
True Negatives (TN): 320
Precision: 0.8750
Recall: 0.2333
F1 Score: 0.3684

--------------------------------------------------

Training and evaluating model: RandomForestClassifier

Confusion Matrix for RandomForestClassifier:
[[320   1]
 [ 15  15]]
Accuracy: 0.9544

Class 0:
True Positives (TP): 320
False Positives (FP): 15
False Negatives (FN): 1
True Negatives (TN): 15
Precision: 0.9552
Recall: 0.9969
F1 Score: 0.9756

Class 1:
True Positives (TP): 15
False Positives (FP): 1
False Negatives (FN): 15
True Negatives (TN): 320
Precision: 0.9375
Recall: 0.5000
F1 Score: 0.6522

----------------------------------------

In [43]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference

# Convert the results list into a DataFrame
results_df = pd.DataFrame(results)

# Save the combined results to a single Excel file
output_file = 'F:/model_res.xlsx'
results_df.to_excel(output_file, sheet_name='Model_Comparison', index=False)

# Now open the Excel file to insert the chart
wb = load_workbook(output_file)
ws = wb['Model_Comparison']

# Define the data range for the chart (C2:E4 contains accuracy, precision, recall, f1 score)
data = Reference(ws, min_col=2, min_row=1, max_col=5, max_row=len(results_df)+1)

# Define the category labels (model names in A2:A4)
categories = Reference(ws, min_col=1, min_row=2, max_row=len(results_df)+1)

# Create a bar chart
chart = BarChart()
chart.title = "Model Comparison: Accuracy, Precision, Recall, F1 Score"
chart.x_axis.title = 'Models'
chart.y_axis.title = 'Scores'
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)

# Add the chart to the sheet (position it in a specific cell, e.g., G2)
ws.add_chart(chart, "G2")

# Save the Excel file with the chart
output_file_with_chart = 'F:/model_res_with_chart.xlsx'
wb.save(output_file_with_chart)

print(f"Comparison graph saved in {output_file_with_chart}")
#print(f"Results saved to {output_file}")

Comparison graph saved in F:/model_res_with_chart.xlsx


In [45]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference

# Convert the results list into a DataFrame
results_df = pd.DataFrame(results)

# Save the combined results to a single Excel file
output_file = 'F:/model_res.xlsx'
results_df.to_excel(output_file, sheet_name='Model_Comparison', index=False)

# Now open the Excel file to insert the chart
wb = load_workbook(output_file)
ws = wb['Model_Comparison']

# Transpose the data for the chart: Metrics on x-axis, Models as series in the legend
# We need to select B1 to E4, but treat the models as series.
data = Reference(ws, min_col=2, min_row=1, max_col=5, max_row=len(results_df)+1)

# The categories (metrics: Accuracy, Precision, Recall, F1 Score) will be on the x-axis
categories = Reference(ws, min_col=2, min_row=1, max_col=5)

# Create a bar chart
chart = BarChart()
chart.title = "Model Comparison: Accuracy, Precision, Recall, F1 Score"
chart.x_axis.title = 'Metrics'
chart.y_axis.title = 'Scores'

# Add the data for each model as a separate series
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)

# Set the series (models) to appear in the legend
chart.legend = True

# Add the chart to the sheet (position it in a specific cell, e.g., G2)
ws.add_chart(chart, "G2")

# Save the Excel file with the chart
output_file_with_chart = 'F:/model_res_with_chart.xlsx'
wb.save(output_file_with_chart)

print(f"Comparison graph saved in {output_file_with_chart}")
print(f"Results saved to {output_file}")


TypeError: <class 'openpyxl.chart.bar_chart.BarChart'>.legend should be <class 'openpyxl.chart.legend.Legend'> but value is <class 'bool'>