In [3]:
!pip install ucimlrepo



In [73]:
# Import necessary libraries
import pandas as pd
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split, KFold
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_curve, auc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [74]:
def load_and_preprocess_data():
    # Fetch and load the dataset
    car_evaluation = fetch_ucirepo(id=19)
    X = car_evaluation.data.features
    y = car_evaluation.data.targets

    # One-Hot Encode categorical features
    X_encoded = pd.get_dummies(X)

    # Encode the target variable using LabelEncoder
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    return X_encoded, y_encoded, label_encoder

In [75]:
def build_model(input_dim, num_classes):
    # Build the model
    model = keras.Sequential([
        layers.Dense(16, activation='relu', input_shape=(input_dim,)),
        layers.Dense(8, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])

    # Compile the model
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


In [76]:
def train_model(model, X_train, y_train):
    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=16,
        validation_split=0.1,
        verbose=0  
    )

    return history

In [82]:
def plot_confusion_matrix(true_labels, predicted_labels, classes, fold_number):
    # Plot the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels, labels=classes)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=classes, yticklabels=classes)
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.title(f'Confusion Matrix - Fold {fold_number}')
    plt.savefig(f'confusion_matrix_fold_{fold_number}.png')
    plt.save()

In [78]:
def plot_roc_curves(fpr, tpr, roc_auc, classes, fold_number):
    # Plot ROC curves
    plt.figure()
    lw = 2

    colors = ['aqua', 'darkorange', 'cornflowerblue', 'green', 'red', 'purple', 'brown']

    for i, color in zip(range(len(classes)), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                       ''.format(classes[i], roc_auc[i]))

    # Plot micro-average ROC curve
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=4)

    # Plot reference line
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic - Fold {fold_number}')
    plt.legend(loc='lower right')
    plt.savefig(f'roc_curve_fold_{fold_number}.png')
    plt.show()

In [79]:
def evaluate_model(model, X_test, y_test, label_encoder, fold_number):
    # Evaluate on the test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Fold {fold_number} - Test Accuracy: {test_accuracy:.4f}")
    print(f"Fold {fold_number} - Test Loss: {test_loss:.4f}")

    # Make predictions (probabilities)
    y_score = model.predict(X_test)

    # Binarize the output labels
    num_classes = len(label_encoder.classes_)
    y_test_binarized = label_binarize(y_test, classes=range(num_classes))

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_binarized[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test_binarized.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Plot ROC curves
    plot_roc_curves(fpr, tpr, roc_auc, label_encoder.classes_, fold_number)

    # Generate classification report
    predicted_classes = tf.argmax(y_score, axis=1).numpy()
    predicted_labels = label_encoder.inverse_transform(predicted_classes)
    true_labels = label_encoder.inverse_transform(y_test)

    report = classification_report(true_labels, predicted_labels, output_dict=False)
    print(f"Fold {fold_number} - Classification Report:\n{report}")
    
    return test_loss, test_accuracy, report

In [80]:
def main():
    # Load and preprocess data
    X_encoded, y_encoded, label_encoder = load_and_preprocess_data()

    # Define number of splits for K-Fold
    n_splits = 5  

    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    # Lists to store metrics for each fold
    fold_accuracies = []
    fold_losses = []
    fold_reports = []

    # Start K-Fold Cross-Validation
    fold_number = 1
    for train_index, test_index in kf.split(X_encoded):
        print(f"\nStarting Fold {fold_number}/{n_splits}")

        # Split data into training and testing sets for this fold
        X_train, X_test = X_encoded.iloc[train_index], X_encoded.iloc[test_index]
        y_train, y_test = y_encoded[train_index], y_encoded[test_index]

        # Get input dimensions and number of classes
        input_dim = X_train.shape[1]
        num_classes = len(label_encoder.classes_)

        # Build the model
        model = build_model(input_dim, num_classes)

        # Train the model
        history = train_model(model, X_train, y_train)

        # Evaluate the model and collect results
        test_loss, test_accuracy, report = evaluate_model(model, X_test, y_test, label_encoder, fold_number)

        # Store metrics for this fold
        fold_accuracies.append(test_accuracy)
        fold_losses.append(test_loss)
        fold_reports.append(report)

        fold_number += 1

    # Calculate and print average metrics
    avg_accuracy = np.mean(fold_accuracies)
    avg_loss = np.mean(fold_losses)
    print(f"\nAverage Test Accuracy over {n_splits} folds: {avg_accuracy:.4f}")
    print(f"Average Test Loss over {n_splits} folds: {avg_loss:.4f}")

In [None]:
if __name__ == "__main__":
    main()