In [1]:
import umap
import time
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from src.preprocessing import CustomDataGenerator
from src.dataloader import read_train, read_val
import numpy as np

import keras
from src import config
from keras.applications.efficientnet import EfficientNetB0, preprocess_input
# set cpu as available physical device
import tensorflow as tf


X_train, y_train = read_train()
X_val, y_val = read_val()

# X_train = X_train[:5000]


INPUT_SHAPE = (config.IMG_SIZE, config.IMG_SIZE, config.N_CHANNELS)
model = keras.applications.EfficientNetB0(weights="imagenet", 
                                 include_top=True)

extractor = keras.Model(inputs=model.inputs, outputs=model.layers[-3].output)
resize_layer = keras.layers.Resizing(224, 224, interpolation='bilinear', name='resize')
preprocess_input_layer = keras.layers.Lambda(preprocess_input, name='preprocess_input')
extractor = keras.Sequential([
    resize_layer,
    preprocess_input_layer,
    extractor
])

extractor(X_train[:1])

def get_features(X, y, extractor):
    loader = CustomDataGenerator(X, y, batch_size=32)
    features = extractor.predict(loader)
    return features

X_train_features = get_features(X_train, y_train, extractor)
X_val_features = get_features(X_val, y_val, extractor)

  from .autonotebook import tqdm as notebook_tqdm
2023-11-29 00:05:57.394192: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-29 00:05:57.498370: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-29 00:06:11.940832: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-



NameError: name 'val_generator_df' is not defined

In [2]:
# grid search
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import xgboost as xgb

# use xgboost as the model with gpu support
rf = xgb.XGBClassifier(tree_method='hist', device="cuda", n_jobs=-1, )


umap_pipe = umap.UMAP(n_components=16)
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('umap', umap_pipe),
    ('rf', rf)
])

pipe.fit(X_train_features, y_train)

y_pred = pipe.predict(X_val_features)
acc = accuracy_score(y_val, y_pred)
print(f"Accuracy: {acc}")

Accuracy: 0.6455172413793103


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [4]:
X_train_prep = umap_pipe.fit_transform(X_train_features, y_train)
X_val_prep = umap_pipe.transform(X_val_features)

rf = xgb.XGBClassifier(tree_method='hist', device="cuda", n_jobs=-1, )
rf.fit(X_train_prep, y_train)

y_pred = rf.predict(X_val_prep)
acc = accuracy_score(y_val, y_pred)
print(f"Accuracy: {acc}")

Accuracy: 0.6744827586206896


In [2]:
# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import xgboost as xgb
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }

def objective(space):
    clf=xgb.XGBClassifier(
                    n_estimators =space['n_estimators'],
                    max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']), device="cuda", n_jobs=-1)

    umap_pipe = umap.UMAP(n_components=16)

    pipe = Pipeline([
                    ('scaler', StandardScaler()),
                    ('umap', umap_pipe),
                    ('clf', clf)])

    
    pipe.fit(X_train_features, y_train)
    

    pred = pipe.predict(X_val_features)
    accuracy = accuracy_score(y_val, pred)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [3]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 5,
                        trials = trials)

SCORE:                                               
0.6544827586206896                                   
 20%|██        | 1/5 [00:21<01:26, 21.55s/trial, best loss: -0.6544827586206896]

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.





SCORE:                                                                          
0.646896551724138                                                               
SCORE:                                                                          
0.643448275862069                                                               
SCORE:                                                                          
0.6358620689655172                                                              
SCORE:                                                                          
0.6462068965517241                                                              
100%|██████████| 5/5 [01:04<00:00, 12.86s/trial, best loss: -0.6544827586206896]


In [4]:
best_hyperparams

{'colsample_bytree': 0.9874580949158513,
 'gamma': 3.871764465617759,
 'max_depth': 11.0,
 'min_child_weight': 7.0,
 'reg_alpha': 51.0,
 'reg_lambda': 0.5771025797200257}

In [3]:
# grid search
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import xgboost as xgb

# use xgboost as the model with gpu support
rf = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=0, n_jobs=-1)


umap_pipe = umap.UMAP(n_components=16)
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('umap', umap_pipe),
    ('rf', rf)
])

param_grid = {
    'rf__n_estimators': [100, 200, 300],
    'rf__min_samples_leaf': [1, 5, 10],
    # 'umap__n_components': [2, 4, 8, 16, 32, 64, 128]
}

grid = GridSearchCV(pipe, param_grid, cv=3, verbose=2, n_jobs=-1)
grid.fit(X_train_features, y_train)

best_params = grid.best_params_

df_results = pd.DataFrame(grid.cv_results_)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


2023-11-28 22:21:05.411592: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-28 22:21:05.442720: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-28 22:21:05.451074: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-28 22:21:05.491642: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tenso