How parameter selection affects model quality?

#### Preproccesing

In [None]:
import os
from pathlib import Path
from math import pi
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE


from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import (KFold, RandomizedSearchCV,
                                     cross_val_score, train_test_split)
from sklearn.tree import DecisionTreeClassifier

train_dir = Path('./data/train')
train_filepaths = list(train_dir.glob(r'**/*.jpg'))

test_dir = Path('./data/test')
test_filepaths = list(test_dir.glob(r'**/*.jpg'))

val_dir = Path('./data/validation')
val_filepaths = list(val_dir.glob(r'**/*.jpg'))

aug_dir = Path('./data/augmented/')
aug_filepaths = list(aug_dir.glob(r'**/*.jpg')) + list(aug_dir.glob(r'**/*.jpeg'))

data = train_filepaths + test_filepaths + val_filepaths

In [None]:
def paths_to_dataframe(path):
    labels = []
    for i in range(len(path)):
        labels.append(str(path[i]).split(os.sep)[-2])

    labels = pd.Series(labels, name='Label')
    path = pd.Series(path, name='Path').astype(str)

    df = pd.concat([path, labels], axis=1)

    df = df.sample(frac=1).reset_index(drop = True)

    return df
    
data_df = paths_to_dataframe(data)

In [None]:
# aug_df = paths_to_dataframe(aug_filepaths)
# aug_df = aug_df.sample(frac=0.1, random_state=42)

In [None]:
# data_df = pd.concat([data_df, aug_df])

In [None]:
data_df = data_df.sample(frac=1, random_state=42)
data_df.shape

In [None]:
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

X = np.array(data_df['Path'])
y = np.array(data_df['Label'])

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

def metrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print("Accuracy: ", accuracy)
    print("Precision: ", precision)
    print("Recall: ", recall)
    print("F1: ", f1)

    classification_rep = classification_report(y_test, y_pred)
    print("Classification Report:")
    print(classification_rep)

    classes = np.unique(y_test)
    class_accuracy = {}
    for cls in classes:
        indices = np.where(y_test == cls)[0]
        class_accuracy[cls] = accuracy_score(y_test[indices], np.array(y_pred)[indices])

    file_path = './wynik.txt'
    with open(file_path, 'a') as file:
        file.write(str(accuracy))
        file.write('\n')
        file.write(str(precision))
        file.write('\n\n')


    return accuracy, precision, recall, f1, class_accuracy

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input

def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()

#### Nested Dichotomy

In [None]:
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV

def nested_dichotomy(X_train, y_train, X_test, param_grid, method):
    unique_labels = np.unique(y_train)
    classifiers = []

    for i in unique_labels:
        y_binary = np.where(y_train == i, 1, 0)

        balancer = RandomOverSampler(random_state=42)
        X_train_balanced, y_train_balanced = balancer.fit_resample(X_train, y_binary)

        if method == 'GridSearchCV':
            search_method = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
        elif method == 'RandomizedSearchCV': 
            search_method = RandomizedSearchCV(DecisionTreeClassifier(), param_distributions=param_grid, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1, random_state=42)
        elif method == 'BayesSearchCV':
            search_method = BayesSearchCV(DecisionTreeClassifier(), param_grid, n_iter=10, cv=5, random_state=42, n_jobs=-1)

        search_method.fit(X_train_balanced, y_train_balanced)
        classifiers.append((search_method, i))

        print(i)

    predictions = []
    for tree, positive_label in classifiers:
        binary_prediction = tree.best_estimator_.predict(X_test)
        predictions.append(np.where(binary_prediction == 1, positive_label, None))

    results = ['tomato' for _ in range(len(X_test))]
    for i in range(len(predictions)):
        for j in range(len(X_test)):
            if predictions[i][j] is not None:
                results[j] = unique_labels[i]
        

    return results, predictions, classifiers

In [None]:
from imblearn.combine import SMOTETomek
from imblearn.under_sampling import TomekLinks, RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE
from tensorflow.keras.applications import MobileNetV2

param_methods = [
'GridSearchCV',
'RandomizedSearchCV',
'BayesSearchCV'
]

# param_methods = [
#     'RandomizedSearchCV',
# ]

def experiment_loop():

    param_grid = {
        'criterion': ['gini', 'entropy'],
        'splitter': ['best', 'random'],
        'max_depth': [10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 5, 10],
    }



    scores = [[] for _ in range(k)]

    mobilenet_model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
    X_ext = np.array([extract_features(img_path, mobilenet_model) for img_path in X])

    for fold, (train_index, val_index) in enumerate(kf.split(X)):
        X_train, X_val = X_ext[train_index], X_ext[val_index]
        y_train, y_val = y[train_index], y[val_index]

        # X_train_fs = np.array([extract_features(img_path, mobilenet_model) for img_path in X_train])
        # X_val_fs = np.array([extract_features(img_path, mobilenet_model) for img_path in X_val])

        for method_name in param_methods:
            print(method_name)
            results, pred, models = nested_dichotomy(X_train, y_train, X_val, param_grid, method_name)
            accuracy, precision, recall, f1, class_accuracy = metrics(y_val, results)
            scores[fold].append((accuracy, precision, recall, f1, class_accuracy))

    return scores

scores = experiment_loop()

In [None]:
file_path = './scores2.txt'
with open(file_path, 'a') as file:
    file.write(str(scores))

In [None]:
# Extract class accuracies for each model from scores
class_accuracies = [[fold[idx][-1] for fold in scores] for idx in range(len(scores[0]))]

# Initialize dictionaries to store the sum of accuracies for each class for each model
sum_class_accuracy = [{key: 0 for key in class_accuracies[model_idx][0].keys()} for model_idx in range(len(class_accuracies))]

# Sum accuracies for each class across all folds and models
for model_idx, model_accuracies in enumerate(class_accuracies):
    for fold in model_accuracies:
        for key, value in fold.items():
            if key in sum_class_accuracy[model_idx]:
                sum_class_accuracy[model_idx][key] += value

# Calculate the number of folds
num_folds = len(scores)

# Calculate average accuracy for each class for each model
avg_class_accuracy = [{key: value / num_folds for key, value in sum_accuracies.items()} for sum_accuracies in sum_class_accuracy]

# Print the results
for model_idx, method_name in enumerate(param_methods):
    print("Average Class Accuracy for", method_name, ":", avg_class_accuracy[model_idx])


In [None]:
plt.figure(figsize=(18, 6))

plt.subplot(1, len(param_methods), 1)
plt.bar(avg_class_accuracy[0].keys(), avg_class_accuracy[0].values(), color='skyblue')
plt.title('Accuracy for Each Class (' + param_methods[0] + ')')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.subplot(1, len(param_methods), 2)
plt.bar(avg_class_accuracy[1].keys(), avg_class_accuracy[1].values(), color='skyblue')
plt.title('Accuracy for Each Class (' + param_methods[1] + ')')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.subplot(1, len(param_methods), 3)
plt.bar(avg_class_accuracy[2].keys(), avg_class_accuracy[2].values(), color='skyblue')
plt.title('Accuracy for Each Class (' + param_methods[2] + ')')
plt.xlabel('Classes')
plt.ylabel('Accuracy')
plt.xticks(rotation=90, ha='right')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()


In [None]:
import numpy as np

# Initialize lists to store average precision, recall, and F1-score for each method
avg_accuracy = []
avg_precision = []
avg_recall = []
avg_f1 = []

# Iterate over the scores
for idx, method_name in enumerate(param_methods):
    # Extract scores for the current method
    method_scores = [fold[idx] for fold in scores]

    # Extract precision, recall, and F1-score for the current method
    accuracy = [score[0] for score in method_scores]
    precisions = [score[1] for score in method_scores]
    recalls = [score[2] for score in method_scores]
    f1_scores = [score[3] for score in method_scores]

    # Calculate average precision, recall, and F1-score for the current method
    avg_accuracy.append(np.mean(accuracy))
    avg_precision.append(np.mean(precisions))
    avg_recall.append(np.mean(recalls))
    avg_f1.append(np.mean(f1_scores))

# Print the calculated average values
for idx, method_name in enumerate(param_methods):
    print("Average Accuracy for", method_name + ":", avg_accuracy[idx])
    print("Average Precision for", method_name + ":", avg_precision[idx])
    print("Average Recall for", method_name + ":", avg_recall[idx])
    print("Average F1-Score for", method_name + ":", avg_f1[idx])


In [None]:
import matplotlib.pyplot as plt

# Definicja metod ekstrakcji cech i odpowiadających metryk
methods = ['GridSearchCV', 'RandomizedSearchCV', 'BayesSearchCV']
metrics = ['Accuracy','Precision', 'Recall', 'F1-Score']

# Wartości dla każdej metody i metryki (wyliczone wcześniej)
values = [
    avg_accuracy,
    avg_precision,
    avg_recall,
    avg_f1
]

# Tworzenie wykresów
plt.figure(figsize=(14, 10))
for i in range(len(metrics)):
    plt.subplot(2, 2, i+1)
    plt.bar(methods, values[i], color=['skyblue', 'lightgreen', 'salmon'])
    plt.title('Average ' + metrics[i])
    plt.xlabel('Feature extraction method')
    plt.ylabel(metrics[i])
    plt.ylim(0, 1)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()


In [None]:
from itertools import combinations
import numpy as np
from scipy import stats

    
def corrected_t_test(accuracies_1, accuracies_2, corr=0.1):
    """
    Performs the corrected t-test for cross-validation results.

    Parameters:
    accuracies_1 (list or np.array): A list or array of accuracy scores from cross-validation for technique 1.
    accuracies_2 (list or np.array): A list or array of accuracy scores from cross-validation for technique 2.
    corr (float, optional): The correlation between successive folds. If None, it will be estimated empirically.

    Returns:
    float: The t-statistic value.
    float: The p-value corresponding to the t-statistic.
    """
    m1 = len(accuracies_1)
    m2 = len(accuracies_2)
    mean_accuracy_1 = np.mean(accuracies_1)
    mean_accuracy_2 = np.mean(accuracies_2)
    variance_1 = np.var(accuracies_1, ddof=1)
    variance_2 = np.var(accuracies_2, ddof=1)
  
    
    corrected_variance_1 = variance_1 / (m1 * (1 - corr))
    corrected_variance_2 = variance_2 / (m2 * (1 - corr))
    standard_error = np.sqrt(corrected_variance_1 / m1 + corrected_variance_2 / m2)
    
    t_statistic = (mean_accuracy_1 - mean_accuracy_2) / standard_error
    p_value = stats.t.sf(np.abs(t_statistic), df=min(m1, m2) - 1) * 2  # two-tailed p-value
    
    return t_statistic, p_value



In [None]:
from itertools import combinations
from scipy.stats import ttest_rel

def compare_methods(method1, name1, method2, name2):
    # Calculate t-statistic and p-value
    t_statistic, p_value = ttest_rel(method1, method2)

    # Determine comparison result
    if t_statistic > 0:
        comparison = f"{name1} is better than {name2}"
    elif t_statistic < 0:
        comparison = f"{name2} is better than {name1}"
    else:
        comparison = f"There is no significant difference between {name1} and {name2}"

    # Determine significance
    significance = "significant" if p_value < 0.05 else "not significant"

    return t_statistic, p_value, comparison, significance

# Extracting the methods and their corresponding average values
methods = param_methods
values = [avg_accuracy, avg_precision, avg_recall, avg_f1]

# Create combinations of method pairs
pairs = combinations(enumerate(methods), 2)

# Iterate over pairs and compare methods
for pair in pairs:
    (idx_1, name_1), (idx_2, name_2) = pair

    # Perform comparison
    t_statistic, p_value, comparison, significance = compare_methods(values[idx_1], name_1, values[idx_2], name_2)

    # Print comparison results
    print(f'{name_1} vs. {name_2}:')
    print(f'T-statistic: {t_statistic}')
    print(f'P-value: {p_value}')
    print(f'Result: {comparison} (p-value {significance})')


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_radar_chart(methods, avg_accuracy, avg_precision, avg_recall, avg_f1, metrics):
    # Number of features
    num_features = 4

    # Create values for each feature
    angles = np.linspace(0, 2 * np.pi, num_features, endpoint=False).tolist()
    angles += angles[:1]

    # Create radar chart
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

    # Create radar chart for each method
    for method, avg_acc, avg_prec, avg_rec, avg_f in zip(methods, avg_accuracy, avg_precision, avg_recall, avg_f1):
        values = [avg_acc, avg_prec, avg_rec, avg_f, avg_acc]  # Values for each feature
        ax.plot(angles, values, linewidth=1, linestyle='solid', label=method)  # Add radar chart

    # Add metric names for each axis
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(metrics)

    # Add legend
    ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

    plt.show()

# Call the function to plot the radar chart
plot_radar_chart(methods, avg_accuracy, avg_precision, avg_recall, avg_f1, metrics)
