## VGG16

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from itertools import product
from tqdm import tqdm
import joblib
import warnings
import matplotlib.pyplot as plt

In [4]:
# Ignore all warnings
warnings.filterwarnings("ignore")

# Step 1: Load Data
train_data = np.load('npy/vgg16/data_train.npy')  # Replace with the actual path
train_features = train_data[:, :-1]
train_labels = train_data[:, -1].astype(int)  # Ensure labels are integers

# Step 2: Split Data for Training and Validation
X_train, X_val, y_train, y_val = train_test_split(
    train_features, train_labels, test_size=0.2, random_state=42
)

# Step 3: Define Hyperparameter Grid
param_grid = {
    'n_estimators': [100, 500, 1000],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.3]
}

# Generate all combinations of hyperparameters
param_list = list(map(lambda x: dict(zip(param_grid.keys(), x)), product(*param_grid.values())))

# Step 4: Initialize Variables for Tracking Results
results_list = []
best_acc = 0
best_param = None

# Step 5: Hyperparameter Tuning with Progress Bar
for param in tqdm(param_list, desc="Hyperparameter Tuning Progress"):
    model = XGBClassifier(
        tree_method='gpu_hist',  # Use GPU for training
        predictor='gpu_predictor',  # Optional: Use GPU for prediction
        use_label_encoder=False,
        eval_metric='logloss',
        **param
    )
    model.fit(X_train, y_train)
    
    # Predict on Validation Set
    y_pred = model.predict(X_val)
    
    # Calculate Metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')
    
    # Store Results
    result = {
        'param': str(param),
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }
    results_list.append(result)
    
    # Update Best Model
    if accuracy > best_acc:
        best_acc = accuracy
        best_param = param
        # Save Best Model
        joblib.dump(model, 'best_xgb_model.pkl')
    
    print(f"Parameters: {param}, Accuracy: {accuracy:.4f}, Best Accuracy: {best_acc:.4f}")

# Step 6: Save Results to a DataFrame and Export to CSV
results_df = pd.DataFrame(results_list)
results_df.to_csv('xgb_tuning_results.csv', index=False)
print("Tuning results exported to 'xgb_tuning_results.csv'")

# Step 7: Visualize the Results
# Sort Results by Accuracy
results_df = results_df.sort_values(by='accuracy', ascending=False)

# Plot Accuracy for Different Parameters
plt.figure(figsize=(12, 6))
plt.bar(results_df['param'], results_df['accuracy'], color='skyblue')
plt.xlabel('Parameters')
plt.ylabel('Accuracy')
plt.title('Accuracy for Different Parameters')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

Hyperparameter Tuning Progress:   4%|█▎                                | 1/27 [00:39<17:00, 39.25s/it]

Parameters: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.01}, Accuracy: 0.6698, Best Accuracy: 0.6698


Hyperparameter Tuning Progress:   7%|██▌                               | 2/27 [01:24<17:42, 42.49s/it]

Parameters: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1}, Accuracy: 0.7190, Best Accuracy: 0.7190


Hyperparameter Tuning Progress:  11%|███▊                              | 3/27 [02:06<16:56, 42.34s/it]

Parameters: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.3}, Accuracy: 0.7204, Best Accuracy: 0.7204


Hyperparameter Tuning Progress:  11%|███▊                              | 3/27 [02:17<18:17, 45.74s/it]


XGBoostError: [07:27:55] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [07:27:55] /workspace/src/c_api/../common/device_helpers.cuh:393: Memory allocation error on worker 0: std::bad_alloc: cudaErrorMemoryAllocation: out of memory
- Free memory: 3426484224
- Requested memory: 5621329280

Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dcbc) [0x7f3d9aa2dcbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x8a9886) [0x7f3d9b0a9886]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x109f24) [0x7f3d9a909f24]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd21e48) [0x7f3d9b521e48]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd223c7) [0x7f3d9b5223c7]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd22944) [0x7f3d9b522944]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd28465) [0x7f3d9b528465]
  [bt] (7) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c911) [0x7f3d9b52c911]
  [bt] (8) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f526) [0x7f3d9ad7f526]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dcbc) [0x7f3d9aa2dcbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2cbd4) [0x7f3d9b52cbd4]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f526) [0x7f3d9ad7f526]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580593) [0x7f3d9ad80593]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cb1f8) [0x7f3d9adcb1f8]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7f3d9a93752f]
  [bt] (6) /usr/lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7f3f1d747e2e]
  [bt] (7) /usr/lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7f3f1d744493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7f3f1d75f3e9]

