In [None]:
from utils import *
mp.set_start_method("spawn")

In [None]:
train_dataloader, test_dataloader = dataloader()
model, loss_fn, accuracy_fn = model_loader('AlexNet_Unfreezed_Wdecay_v1_cv1')
model.to(device)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
from tqdm import tqdm
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def plot_confusion_matrix_and_metrics(model, data_loader, device, class_names, criterion):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    total_loss = 0

    with torch.inference_mode():
        for X, y in tqdm(data_loader):
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss = criterion(y_pred, y)
            total_loss += loss.item()

            probs = torch.softmax(y_pred, dim=1)
            _, preds = torch.max(y_pred, 1)
            
            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    all_probs = np.array(all_probs)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    avg_loss = total_loss / len(data_loader)

    print(f"Loss: {avg_loss:.4f}")
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.4f}")
    print(f"Recall (Sensitivity): {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    if len(class_names) > 2:
        fpr = {}
        tpr = {}
        roc_auc = {}
        for i in range(len(class_names)):
            fpr[i], tpr[i], _ = roc_curve(all_labels == i, all_probs[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        plt.figure(figsize=(10, 8))
        for i in range(len(class_names)):
            plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')
        
        plt.plot([0, 1], [0, 1], 'k--', lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC AUC Curve')
        plt.legend(loc='lower right')
        plt.savefig('roc_auc_curve_9010_cnn.png', bbox_inches='tight', dpi=300)
        plt.show()

class_names = test_dataloader.dataset.classes
criterion = torch.nn.CrossEntropyLoss()
plot_confusion_matrix_and_metrics(model, test_dataloader, device, class_names, criterion)

In [None]:
model.classifier = nn.Sequential()
model

In [None]:
train_features, train_labels = extract_features(train_dataloader, model)
test_features, test_labels = extract_features(test_dataloader, model)

In [None]:
train_labels.shape

In [None]:
h2o.init()

In [None]:
train_features_np = train_features.cpu().numpy()
train_labels_np = train_labels.cpu().numpy()

train_features_h2o = h2o.H2OFrame(train_features_np)
train_labels_h2o = h2o.H2OFrame(train_labels_np, column_names=['label'])

In [None]:
lasso_model = H2OGeneralizedLinearEstimator(
    family="gaussian",
    alpha=1.0,
    lambda_=0.00000000000000000000001,
    max_iterations=10000
)
start_time = timer()
train_data_h2o = train_features_h2o.cbind(train_labels_h2o)
lasso_model.train(
    x=train_features_h2o.columns,
    y='label',
    training_frame=train_data_h2o
)
end_time = timer()
print(f'Total training time: {end_time - start_time:.3f} seconds')

In [None]:
coefficients = lasso_model.coef()
first_key = next(iter(coefficients))
del coefficients[first_key]
non_zero_indices = [i for i, (key, value) in enumerate(coefficients.items()) if value != 0]

len(non_zero_indices)

In [None]:
from timeit import default_timer as timer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

start_time = timer()

coefficients = lasso_model.coef()
first_key = next(iter(coefficients))
del coefficients[first_key]
non_zero_indices = [i for i, (key, value) in enumerate(coefficients.items()) if value != 0]

train_features_selected = train_features
test_features_selected = test_features

xgb_model = XGBClassifier()
xgb_model.fit(train_features_selected, train_labels)

all_preds = xgb_model.predict(test_features_selected)
all_probs = xgb_model.predict_proba(test_features_selected)
accuracy = accuracy_score(test_labels, all_preds)

all_preds = np.array(all_preds)
all_labels = np.array(test_labels)

cm = confusion_matrix(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')

end_time = timer()
print(f'Total training time: {end_time - start_time:.3f} seconds')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")