In [1]:
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.datasets import cifar10
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Flatten the images (convert 32x32x3 images into 1D vectors)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Normalize the data
scaler = StandardScaler()
X_train_flat = scaler.fit_transform(X_train_flat)
X_test_flat = scaler.transform(X_test_flat)

# Optional: Use a smaller subset of the data to speed up the tuning process
X_train_subset, _, y_train_subset, _ = train_test_split(X_train_flat, y_train, test_size=0.95, random_state=42)
X_test_subset, _, y_test_subset, _ = train_test_split(X_test_flat, y_test, test_size=0.95, random_state=42)

# Convert the labels into a 1D array
y_train_subset = y_train_subset.ravel()
y_test_subset = y_test_subset.ravel()

# Create an XGBoost model
xgb_model = xgb.XGBClassifier(eval_metric='mlogloss')

# Set the hyperparameter grid
param_dist = {
    'max_depth': [3, 5, 7],           # Maximum depth of the trees
    'learning_rate': [0.01, 0.1, 0.3],# Learning rate
    'n_estimators': [50, 100, 200],   # Number of boosting rounds
    'gamma': [0, 0.1, 0.3],           # Minimum loss reduction required for further partitioning
    'subsample': [0.7, 0.8, 1.0],     # Fraction of samples used per tree
    'colsample_bytree': [0.7, 0.8, 1.0], # Fraction of features used per tree,
    'tree_method': ['hist'],          # Use 'hist' method for tree building
    'device': ['cuda']                # Use GPU (CUDA)
}

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    xgb_model, param_distributions=param_dist, n_iter=10, scoring='accuracy', cv=3, verbose=1, random_state=42
)

# Perform hyperparameter tuning
random_search.fit(X_train_subset, y_train_subset)

# Output the best hyperparameters
print("Best Hyperparameters:", random_search.best_params_)

# Evaluate performance on the test set
y_pred = random_search.best_estimator_.predict(X_test_subset)
accuracy = accuracy_score(y_test_subset, y_pred)
print(f"Test accuracy: {accuracy:.2f}")


2024-09-14 20:59:47.569954: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-14 20:59:47.578628: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-14 20:59:47.581193: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-14 20:59:47.587963: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Fitting 3 folds for each of 10 candidates, totalling 30 fits


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




Best Hyperparameters: {'tree_method': 'hist', 'subsample': 0.8, 'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.1, 'gamma': 0.1, 'device': 'cuda', 'colsample_bytree': 0.7}
Test accuracy: 0.38
