In [11]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import tracemalloc
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier

# Initialize memory tracking
tracemalloc.start()

# Load pretrained AlexNet
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)

# Modify AlexNet to use as a feature extractor
alexnet.classifier = nn.Sequential(*list(alexnet.classifier.children())[:-1])  # Remove the last layer
alexnet.eval()  # Set the model to evaluation mode

# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

# Measure feature extraction time
feature_extraction_times = []

# Function to extract features from a single image
def extract_features(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension
    start_time = time.time()
    with torch.no_grad():
        features = alexnet(image).numpy()
    feature_extraction_times.append(time.time() - start_time)  # Track time
    return features.flatten()

# Prepare dataset and extract features
image_paths = []
labels = []
root_dir = r'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\Classification_dataset'

for label in os.listdir(root_dir):
    label_dir = os.path.join(root_dir, label)
    if os.path.isdir(label_dir):
        for filename in os.listdir(label_dir):
            if filename.endswith(".jpg"):
                image_paths.append(os.path.join(label_dir, filename))
                labels.append(label)

features_list = []
for image_path in image_paths:
    features = extract_features(image_path)
    features_list.append(features)

X = np.array(features_list)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Calculate average feature extraction time
average_feature_extraction_time = np.mean(feature_extraction_times)

# Memory usage after feature extraction
current_memory, peak_memory = tracemalloc.get_traced_memory()
peak_memory_mb = peak_memory / (1024 * 1024)  # Convert to MB
tracemalloc.stop()

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    "k-NN": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "LDA": LinearDiscriminantAnalysis(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "Naive Bayes": GaussianNB(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "LightGBM": LGBMClassifier(),
    "Gaussian Process": GaussianProcessClassifier(),
    "MLP": MLPClassifier(max_iter=500),
}

# Evaluate classifiers
results = []
for clf_name, clf in classifiers.items():
    print(f"Evaluating {clf_name}...")

    # Measure training time
    start_time = time.time()
    clf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure inference time
    start_time = time.time()
    y_pred = clf.predict(X_test)
    inference_time = (time.time() - start_time) / len(X_test)

    # Calculate probabilities if supported
    try:
        y_pred_proba = clf.predict_proba(X_test)
        test_log_loss = log_loss(y_test, y_pred_proba)
    except AttributeError:
        test_log_loss = None

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Save results
    results.append({
        "Classifier": clf_name,
        "Accuracy": f"{accuracy * 100:.2f}%",
        "Precision": f"{precision * 100:.2f}%",
        "Recall": f"{recall * 100:.2f}%",
        "F1-Score": f"{f1 * 100:.2f}%",
        "Log Loss": f"{test_log_loss:.4f}" if test_log_loss else "N/A",
        "Training Time": f"{training_time:.2f} seconds",
        "Inference Time": f"{inference_time:.4f} seconds per image",
        "Memory Usage (Peak)": f"{peak_memory_mb:.2f} MB",
        "Average Feature Extraction Time": f"{average_feature_extraction_time:.4f} seconds per image"
    })

    print(f"{clf_name} Evaluation Completed.")

# Save results to Excel
results_df = pd.DataFrame(results)
excel_file_path = r"G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\multi_classifier_performance_metrics_with_memory.xlsx"
results_df.to_excel(excel_file_path, index=False, sheet_name="Metrics")
print(f"Results saved to '{excel_file_path}'.")

# Plot results
results_df.plot(x="Classifier", y=["Accuracy", "Precision", "Recall", "F1-Score"], kind="bar", figsize=(10, 7))
plt.title("Classifier Metrics Comparison")
plt.ylabel("Percentage")
plt.tight_layout()
plt.savefig(r"G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\classifier_comparison_with_memory.png", dpi=300)
plt.show()




Evaluating k-NN...
k-NN Evaluation Completed.
Evaluating Decision Tree...
Decision Tree Evaluation Completed.
Evaluating Random Forest...
Random Forest Evaluation Completed.
Evaluating AdaBoost...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


AdaBoost Evaluation Completed.
Evaluating Logistic Regression...
Logistic Regression Evaluation Completed.
Evaluating LDA...
LDA Evaluation Completed.
Evaluating QDA...




QDA Evaluation Completed.
Evaluating Naive Bayes...
Naive Bayes Evaluation Completed.
Evaluating XGBoost...


Parameters: { "use_label_encoder" } are not used.



XGBoost Evaluation Completed.
Evaluating LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 133874
[LightGBM] [Info] Number of data points in the train set: 1280, number of used features: 923
[LightGBM] [Info] Start training from score -2.085711
[LightGBM] [Info] Start training from score -2.130735
[LightGBM] [Info] Start training from score -2.124177
[LightGBM] [Info] Start training from score -2.048670
[LightGBM] [Info] Start training from score -2.048670
[LightGBM] [Info] Start training from score -2.104759
[LightGBM] [Info] Start training from score -2.054749
[LightGBM] [Info] Start training from score -2.042628
LightGBM Evaluation Completed.
Evaluating Gaussian Process...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Gaussian Process Evaluation Completed.
Evaluating MLP...
MLP Evaluation Completed.
Results saved to 'G:\PhD_Zhejiang University\Mango\Final_Paper_Data_Transfer learning\multi_classifier_performance_metrics_with_memory.xlsx'.


TypeError: no numeric data to plot