How different feature extraction methods affect the effectiveness of classification?

In [None]:
import os
from pathlib import Path
from math import pi
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.applications import VGG16, InceptionV3, MobileNetV2


train_dir = Path('./data/train')
train_filepaths = list(train_dir.glob(r'**/*.jpg'))

test_dir = Path('./data/test')
test_filepaths = list(test_dir.glob(r'**/*.jpg'))

val_dir = Path('./data/validation')
val_filepaths = list(val_dir.glob(r'**/*.jpg'))

aug_dir = Path('./data/augmented/')
aug_filepaths = list(aug_dir.glob(r'**/*.jpg')) + list(aug_dir.glob(r'**/*.jpeg'))

data = train_filepaths + test_filepaths + val_filepaths + aug_filepaths

In [None]:
def paths_to_dataframe(path):
    labels = []
    for i in range(len(path)):
        labels.append(str(path[i]).split(os.sep)[-2])

    labels = pd.Series(labels, name='Label')
    path = pd.Series(path, name='Path').astype(str)

    df = pd.concat([path, labels], axis=1)

    df = df.sample(frac=1).reset_index(drop = True)

    return df
    
data_df = paths_to_dataframe(data)

In [None]:
data_df = data_df.sample(frac=0.1, random_state=42)
data_df.shape

In [None]:
k = 2
kf = KFold(n_splits=k, shuffle=True, random_state=42)

X = np.array(data_df['Path'])
y = np.array(data_df['Label'])

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

def metrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print("Accuracy: ", accuracy)
    print("Precision: ", precision)
    print("Recall: ", recall)
    print("F1: ", f1)

    classification_rep = classification_report(y_test, y_pred)
    print("Classification Report:")
    print(classification_rep)

    classes = np.unique(y_test)
    class_accuracy = {}
    for cls in classes:
        indices = np.where(y_test == cls)[0]
        class_accuracy[cls] = accuracy_score(y_test[indices], np.array(y_pred)[indices])

    return accuracy, precision, recall, f1, class_accuracy

In [None]:
from sklearn.tree import DecisionTreeClassifier

def nested_dichotomy(X_train, y_train, X_test):
    unique_labels = np.unique(y_train)
    num_classes = len(unique_labels)
    classifiers = []
    classified_indices = np.full(len(X_train), False)

    for i in unique_labels:
        y_binary = np.where(y_train == i, 1, 0)
        tree = DecisionTreeClassifier()
        tree.fit(X_train, y_binary)
        classifiers.append((tree, i))
        print(i)

    predictions = []
    for tree, positive_label in classifiers:
        binary_prediction = tree.predict(X_test)
        predictions.append(np.where(binary_prediction == 1, positive_label, None))

    results = ['tomato' for _ in range(len(X_test))]
    for i in range(len(predictions)):
        for j in range(len(X_test)):
            if predictions[i][j] is not None:
                results[j] = unique_labels[i]
        

    return results, predictions, classifiers

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input


def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()

In [None]:

def experiment_loop():

    feature_selection = [VGG16(weights='imagenet', include_top=False, pooling='avg'),
                          InceptionV3(weights='imagenet', include_top=False, pooling='avg'),
                          MobileNetV2(weights='imagenet', include_top=False, pooling='avg')]
            
    scores = [[] for _ in range(k)]

    for fold, (train_index, val_index) in enumerate(kf.split(X)):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        for fs_count, fs in enumerate(feature_selection):
            X_train_fs = np.array([extract_features(img_path, fs) for img_path in X_train])
            X_val_fs = np.array([extract_features(img_path, fs) for img_path in X_val])
            results, pred, models = nested_dichotomy(X_train_fs, y_train, X_val_fs)
            accuracy, precision, recall, f1, class_accuracy = metrics(y_val, results)
            scores[fold].append((accuracy, precision, recall, f1, class_accuracy))

    return scores

scores = experiment_loop()


In [None]:
#from tensorflow.keras.applications import MobileNetV2

#mobilenet_model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
#X_train_mobilenet = np.array([extract_features(img_path, mobilenet_model) for img_path in X])
#X_test_mobilenet = np.array([extract_features(img_path, mobilenet_model) for img_path in X_test])
#results_mobilenet, pred_mobilenet, models_mobilenet = nested_dichotomy(X_train_mobilenet, y, X_test_mobilenet)
#accuracy_mobilenet, precision_mobilenet, recall_mobilenet, f1_mobilenet, class_accuracy_mobilenet = metrics(y_test, results_mobilenet)

In [None]:
# Calculate average accuracy for each class and each model
# Extract class accuracy dictionaries for each model from scores
class_accuracy_vgg16 = [fold[0][-1] for fold in scores]
class_accuracy_inception = [fold[1][-1] for fold in scores]
class_accuracy_mobilenet = [fold[2][-1] for fold in scores]

# Initialize dictionaries to store the sum of accuracies for each class
sum_class_accuracy_vgg16 = {key: 0 for key in class_accuracy_vgg16[0].keys()}
sum_class_accuracy_inception = {key: 0 for key in class_accuracy_inception[0].keys()}
sum_class_accuracy_mobilenet = {key: 0 for key in class_accuracy_mobilenet[0].keys()}

# Sum accuracies for each class across all folds
for fold in class_accuracy_vgg16:
    for key, value in fold.items():
        sum_class_accuracy_vgg16[key] += value

for fold in class_accuracy_inception:
    for key, value in fold.items():
        sum_class_accuracy_inception[key] += value

for fold in class_accuracy_mobilenet:
    for key, value in fold.items():
        sum_class_accuracy_mobilenet[key] += value

# Calculate the number of folds
num_folds = len(scores)

# Calculate average accuracy for each class for each model
avg_class_accuracy_vgg16 = {key: value / num_folds for key, value in sum_class_accuracy_vgg16.items()}
avg_class_accuracy_inception = {key: value / num_folds for key, value in sum_class_accuracy_inception.items()}
avg_class_accuracy_mobilenet = {key: value / num_folds for key, value in sum_class_accuracy_mobilenet.items()}

# Print the results
print("Average Class Accuracy for VGG16:", avg_class_accuracy_vgg16)
print("Average Class Accuracy for InceptionV3:", avg_class_accuracy_inception)
print("Average Class Accuracy for MobileNetV2:", avg_class_accuracy_mobilenet)


In [None]:
plt.figure(figsize=(18, 6))

plt.subplot(1, 3, 1)
plt.bar(avg_class_accuracy_mobilenet.keys(), avg_class_accuracy_mobilenet.values(), color='skyblue')
plt.title('Accuracy for Each Class (MobileNet)')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.subplot(1, 3, 2)
plt.bar(avg_class_accuracy_inception.keys(), avg_class_accuracy_inception.values(), color='skyblue')
plt.title('Accuracy for Each Class (InceptionV3)')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.subplot(1, 3, 3)
plt.bar(avg_class_accuracy_vgg16.keys(), avg_class_accuracy_vgg16.values(), color='skyblue')
plt.title('Accuracy for Each Class (VGG16)')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()


In [None]:
# Inicjalizacja list do przechowywania średnich precyzji, odwołań i F1-score dla każdej metody
accuracy_mobilenet = []
precision_mobilenet = []
recall_mobilenet = []
f1_mobilenet = []

accuracy_inception = []
precision_inception = []
recall_inception = []
f1_inception = []

accuracy_vgg16 = []
precision_vgg16 = []
recall_vgg16 = []
f1_vgg16 = []

# Iteracja po wynikach z scores
for fold_scores in scores:
    # Dla każdego zestawu wyników obliczamy średnią precyzję, odwołanie i F1-score
    accuracy_mobilenet.append(fold_scores[0][0])
    precision_mobilenet.append(fold_scores[0][1])
    recall_mobilenet.append(fold_scores[0][2])
    f1_mobilenet.append(fold_scores[0][3])

    accuracy_inception.append(fold_scores[1][0])
    precision_inception.append(fold_scores[1][1])
    recall_inception.append(fold_scores[1][2])
    f1_inception.append(fold_scores[1][3])

    accuracy_vgg16.append(fold_scores[2][0])
    precision_vgg16.append(fold_scores[2][1])
    recall_vgg16.append(fold_scores[2][2])
    f1_vgg16.append(fold_scores[2][3])

# Obliczanie średnich wartości dla każdej metody
avg_accuracy_mobilenet = sum(accuracy_mobilenet) / len(accuracy_mobilenet)
avg_precision_mobilenet = sum(precision_mobilenet) / len(precision_mobilenet)
avg_recall_mobilenet = sum(recall_mobilenet) / len(recall_mobilenet)
avg_f1_mobilenet = sum(f1_mobilenet) / len(f1_mobilenet)

avg_accuracy_inception = sum(accuracy_inception) / len(accuracy_inception)
avg_precision_inception = sum(precision_inception) / len(precision_inception)
avg_recall_inception = sum(recall_inception) / len(recall_inception)
avg_f1_inception = sum(f1_inception) / len(f1_inception)

avg_accuracy_vgg16 = sum(accuracy_vgg16) / len(accuracy_vgg16)
avg_precision_vgg16 = sum(precision_vgg16) / len(precision_vgg16)
avg_recall_vgg16 = sum(recall_vgg16) / len(recall_vgg16)
avg_f1_vgg16 = sum(f1_vgg16) / len(f1_vgg16)

# Wypisanie obliczonych wartości średnich
print("Average Precision for MobileNet:", avg_precision_mobilenet)
print("Average Recall for MobileNet:", avg_recall_mobilenet)
print("Average F1-Score for MobileNet:", avg_f1_mobilenet)

print("Average Precision for InceptionV3:", avg_precision_inception)
print("Average Recall for InceptionV3:", avg_recall_inception)
print("Average F1-Score for InceptionV3:", avg_f1_inception)

print("Average Precision for VGG16:", avg_precision_vgg16)
print("Average Recall for VGG16:", avg_recall_vgg16)
print("Average F1-Score for VGG16:", avg_f1_vgg16)


In [None]:
import matplotlib.pyplot as plt

methods = ['MobileNet', 'InceptionV3', 'VGG16']
accuracies = [avg_accuracy_vgg16, avg_accuracy_inception, avg_accuracy_mobilenet]
precisions = [avg_precision_mobilenet, avg_precision_inception, avg_precision_vgg16]
recalls = [avg_recall_mobilenet, avg_recall_inception, avg_recall_vgg16]
f1_scores = [avg_f1_mobilenet, avg_f1_inception, avg_f1_vgg16]


# Charts
plt.figure(figsize=(14, 10))

plt.subplot(2, 2, 1)
plt.bar(methods, accuracies, color=['skyblue', 'lightgreen', 'salmon'])
plt.title('Average Accuracy')
plt.xlabel('Feature extraction method')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.subplot(2, 2, 2)
plt.bar(methods, precisions, color=['skyblue', 'lightgreen', 'salmon'])
plt.title('Average Precision')
plt.xlabel('Feature extraction method')
plt.ylabel('Precision')
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.subplot(2, 2, 3)
plt.bar(methods, recalls, color=['skyblue', 'lightgreen', 'salmon'])
plt.title('Average Recall')
plt.xlabel('Feature extraction method')
plt.ylabel('Recall')
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.subplot(2, 2, 4)
plt.bar(methods, f1_scores, color=['skyblue', 'lightgreen', 'salmon'])
plt.title('Average F1-Score')
plt.xlabel('Feature extraction method')
plt.ylabel('F1-Score')
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()


In [None]:
from itertools import combinations
import numpy as np
from scipy import stats


def corrected_t_test(accuracies_1, accuracies_2, corr=0.1):
    """
    Performs the corrected t-test for cross-validation results.

    Parameters:
    accuracies_1 (list or np.array): A list or array of accuracy scores from cross-validation for technique 1.
    accuracies_2 (list or np.array): A list or array of accuracy scores from cross-validation for technique 2.
    corr (float, optional): The correlation between successive folds. If None, it will be estimated empirically.

    Returns:
    float: The t-statistic value.
    float: The p-value corresponding to the t-statistic.
    """
    m1 = len(accuracies_1)
    m2 = len(accuracies_2)
    mean_accuracy_1 = np.mean(accuracies_1)
    mean_accuracy_2 = np.mean(accuracies_2)
    variance_1 = np.var(accuracies_1, ddof=1)
    variance_2 = np.var(accuracies_2, ddof=1)
  
    
    corrected_variance_1 = variance_1 / (m1 * (1 - corr))
    corrected_variance_2 = variance_2 / (m2 * (1 - corr))
    standard_error = np.sqrt(corrected_variance_1 / m1 + corrected_variance_2 / m2)
    
    t_statistic = (mean_accuracy_1 - mean_accuracy_2) / standard_error
    p_value = stats.t.sf(np.abs(t_statistic), df=min(m1, m2) - 1) * 2  # two-tailed p-value
    
    return t_statistic, p_value



In [None]:
from itertools import combinations

# Tworzenie kombinacji par metod
pairs = combinations([(accuracy_vgg16, 'VGG16'), (accuracy_inception, 'InceptionV3'), (accuracy_mobilenet, 'MobileNet')], 2)

# Iteracja po parach metod
for pair in pairs:
    (accuracies_1, name_1), (accuracies_2, name_2) = pair
    
    # Wywołanie funkcji corrected_t_test dla każdej pary metod
    t_statistic, p_value = corrected_t_test(accuracies_1, accuracies_2)
    
    # Porównanie wyników
    if t_statistic > 0:
        comparison = f"{name_1} is better than {name_2}"
    elif t_statistic < 0:
        comparison = f"{name_2} is better than {name_1}"
    else:
        comparison = f"There is no significant difference between {name_1} and {name_2}"
    
    # Określenie istotności wyników
    significance = "significant" if p_value < 0.05 else "not significant"
    
    # Wyświetlenie wyników porównania
    print(f'{name_1} vs. {name_2}:')
    print(f'T-statistic: {t_statistic}')
    print(f'P-value: {p_value}')
    print(f'Result: {comparison} (p-value {significance})')


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Metody
methods = ['MobileNet', 'InceptionV3', 'VGG16']

# Średnie wartości dla każdej metody
avg_accuracy = [avg_accuracy_mobilenet, avg_accuracy_inception, avg_accuracy_vgg16]
avg_precision = [avg_precision_mobilenet, avg_precision_inception, avg_precision_vgg16]
avg_recall = [avg_recall_mobilenet, avg_recall_inception, avg_recall_vgg16]
avg_f1 = [avg_f1_mobilenet, avg_f1_inception, avg_f1_vgg16]



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Średnie wyniki dla każdej metody
avg_accuracy = [avg_accuracy_mobilenet, avg_accuracy_inception, avg_accuracy_vgg16]
avg_precision = [avg_precision_mobilenet, avg_precision_inception, avg_precision_vgg16]
avg_recall = [avg_recall_mobilenet, avg_recall_inception, avg_recall_vgg16]
avg_f1 = [avg_f1_mobilenet, avg_f1_inception, avg_f1_vgg16]

# Metody
methods = ['MobileNet', 'InceptionV3', 'VGG16']

# Metryki
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']

# Liczba cech
num_features = 4

# Tworzenie wartości dla każdej cechy
angles = np.linspace(0, 2 * np.pi, num_features, endpoint=False).tolist()

# Pierwsza wartość dla osi X powinna być też ostatnią, aby zamknąć wykres
angles += angles[:1]

# Tworzenie radarchartu
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

# Tworzenie radarchartu dla każdej metody
for method, avg_acc, avg_prec, avg_rec, avg_f in zip(methods, avg_accuracy, avg_precision, avg_recall, avg_f1):
    values = [avg_acc, avg_prec, avg_rec, avg_f, avg_acc]  # Wartości dla każdej cechy
    ax.plot(angles, values, linewidth=1, linestyle='solid', label=method)  # Dodanie radarchartu

# Dodanie nazw metryk dla każdej osi
ax.set_xticks(angles[:-1])
ax.set_xticklabels(metrics)

# Dodanie legendy
ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

plt.show()
