In [1]:
import os
import numpy as np
import time
import tracemalloc
import pandas as pd
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.xception import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, log_loss
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier

# Initialize memory tracking
tracemalloc.start()

# Load pre-trained Xception model (without top layers)
base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Function to extract features from a single image
def extract_features(image_path):
    img = load_img(image_path, target_size=(299, 299))  # Xception requires 299x299 input
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)  # Xception-specific preprocessing
    start_time = time.time()
    features = model.predict(img_array, verbose=0)
    feature_extraction_times.append(time.time() - start_time)
    return features.flatten()

# Prepare dataset and extract features
image_paths = []
labels = []
feature_extraction_times = []
root_dir = r'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset'

for label in os.listdir(root_dir):
    label_dir = os.path.join(root_dir, label)
    if os.path.isdir(label_dir):
        for filename in os.listdir(label_dir):
            if filename.lower().endswith(".jpg"):
                image_paths.append(os.path.join(label_dir, filename))
                labels.append(label)

features_list = []
for image_path in image_paths:
    features = extract_features(image_path)
    features_list.append(features)

X = np.array(features_list)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    "k-NN": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "LDA": LinearDiscriminantAnalysis(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "Naive Bayes": GaussianNB(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "LightGBM": LGBMClassifier(),
    "Gaussian Process": GaussianProcessClassifier(),
    "MLP": MLPClassifier(max_iter=500),
}

# Evaluate each classifier
for clf_name, clf in classifiers.items():
    print(f"Evaluating {clf_name}...")
    # Measure training time
    start_time = time.time()
    clf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Predict and evaluate
    start_time = time.time()
    y_pred = clf.predict(X_test)
    inference_time = (time.time() - start_time) / len(X_test)

    # Try to calculate log loss if predict_proba is available
    try:
        y_pred_proba = clf.predict_proba(X_test)
        test_log_loss = log_loss(y_test, y_pred_proba)
    except AttributeError:
        test_log_loss = None

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    avg_extraction_time = np.mean(feature_extraction_times)

    # Memory usage
    _, peak_memory = tracemalloc.get_traced_memory()
    memory_usage = peak_memory / (1024 * 1024)  # Convert to MB

    # Save results
    results = {
        "Metric": [
            "Overall Accuracy",
            "Precision",
            "Recall",
            "F1-Score",
            "Test Log Loss",
            "Average Feature Extraction Time",
            "Training Time",
            "Average Inference Time",
            "Peak Memory Usage"
        ],
        "Value": [
            f"{accuracy * 100:.2f}%",
            f"{precision * 100:.2f}%",
            f"{recall * 100:.2f}%",
            f"{f1 * 100:.2f}%",
            f"{test_log_loss:.4f}" if test_log_loss else "N/A",
            f"{avg_extraction_time:.4f} seconds per image",
            f"{training_time:.2f} seconds",
            f"{inference_time:.4f} seconds per image",
            f"{memory_usage:.2f} MB"
        ]
    }
    results_df = pd.DataFrame(results)
    excel_file_path = os.path.join(root_dir, f"{clf_name.replace(' ', '_').lower()}_performance_metrics.xlsx")
    results_df.to_excel(excel_file_path, index=False, sheet_name="Metrics")
    print(f"Results for {clf_name} saved to '{excel_file_path}'.")

# Stop memory tracking
tracemalloc.stop()


Evaluating k-NN...
Results for k-NN saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\k-nn_performance_metrics.xlsx'.
Evaluating Decision Tree...
Results for Decision Tree saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\decision_tree_performance_metrics.xlsx'.
Evaluating Random Forest...
Results for Random Forest saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\random_forest_performance_metrics.xlsx'.
Evaluating AdaBoost...




Results for AdaBoost saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\adaboost_performance_metrics.xlsx'.
Evaluating Logistic Regression...
Results for Logistic Regression saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\logistic_regression_performance_metrics.xlsx'.
Evaluating LDA...




Results for LDA saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\lda_performance_metrics.xlsx'.
Evaluating QDA...




Results for QDA saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\qda_performance_metrics.xlsx'.
Evaluating Naive Bayes...
Results for Naive Bayes saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\naive_bayes_performance_metrics.xlsx'.
Evaluating XGBoost...


Parameters: { "use_label_encoder" } are not used.



Results for XGBoost saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\xgboost_performance_metrics.xlsx'.
Evaluating LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 6.279276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13352316
[LightGBM] [Info] Number of data points in the train set: 1280, number of used features: 125949
[LightGBM] [Info] Start training from score -2.085711
[LightGBM] [Info] Start training from score -2.130735
[LightGBM] [Info] Start training from score -2.124177
[LightGBM] [Info] Start training from score -2.048670
[LightGBM] [Info] Start training from score -2.048670
[LightGBM] [Info] Start training from score -2.104759
[LightGBM] [Info] Start training from score -2.054749
[LightGBM] [Info] Start training from score -2.042628
Results for LightGBM saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer lea

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for Gaussian Process saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\gaussian_process_performance_metrics.xlsx'.
Evaluating MLP...
Results for MLP saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset\mlp_performance_metrics.xlsx'.


