In [None]:
import cv2 
import os 
import numpy as np 
import optuna 
from sklearn.model_selection import cross_val_score, StratifiedKFold 
from sklearn.ensemble import RandomForestClassifier 
from tqdm import tqdm 
from skimage import feature 

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
DATASET_PATH = 'dataset_augmented'
IMG_WIDTH = 224
IMG_HEIGHT = 224
N_TRIALS = 100
N_FOLDS = 5

In [4]:
def load_and_extract_hog(path, label):
    data = []
    labels = []
    files = [f for f in os.listdir(path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    for f in tqdm(files, desc=f"Loading label {label}"):
        img_path = os.path.join(path, f)
        image = cv2.imread(img_path)
        if image is None: continue
        
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        hog_feat = feature.hog(
            gray, 
            orientations=9, 
            pixels_per_cell=(16, 16), 
            cells_per_block=(2, 2), 
            transform_sqrt=True, 
            block_norm='L1'
        )
        
        data.append(hog_feat)
        labels.append(label)
        
    return data, labels

In [5]:
pos_X, pos_y = load_and_extract_hog(os.path.join(DATASET_PATH, 'Positive'), 1)
neg_X, neg_y = load_and_extract_hog(os.path.join(DATASET_PATH, 'Negative'), 0)
X = np.array(pos_X + neg_X)
y = np.array(pos_y + neg_y)

print(f"Data Loaded: {X.shape}")

Loading label 1: 100%|██████████| 1325/1325 [01:04<00:00, 20.70it/s] 
Loading label 0: 100%|██████████| 1325/1325 [01:04<00:00, 20.46it/s]

Data Loaded: (2650, 6084)





In [9]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    max_depth = trial.suggest_int('max_depth', 5, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42,
        n_jobs=-1 
    )
    
    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
    scores = cross_val_score(model, X, y, cv=cv, scoring='f1_macro')
    
    return scores.mean()

In [10]:
print(f"\n--- STARTING TUNING ({N_TRIALS} Trials) ---")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=N_TRIALS)

print("\n" + "="*40)
print("OPTUNA TUNING COMPLETE")
print("="*40)
print(f"Best Trial Score (Macro F1): {study.best_value:.4f}")
print("Best Parameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

[I 2025-12-05 22:52:46,992] A new study created in memory with name: no-name-bf744b63-8bfd-4051-9394-9804d4f1cc12



--- STARTING TUNING (100 Trials) ---


[I 2025-12-05 22:52:52,674] Trial 0 finished with value: 0.784229757642909 and parameters: {'n_estimators': 153, 'max_depth': 30, 'min_samples_split': 14, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.784229757642909.
[I 2025-12-05 22:52:59,692] Trial 1 finished with value: 0.7789770590563668 and parameters: {'n_estimators': 201, 'max_depth': 40, 'min_samples_split': 7, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.784229757642909.
[I 2025-12-05 22:53:02,845] Trial 2 finished with value: 0.7760771694009236 and parameters: {'n_estimators': 75, 'max_depth': 17, 'min_samples_split': 20, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.784229757642909.
[I 2025-12-05 22:53:10,765] Trial 3 finished with value: 0.7826070219128679 and parameters: {'n_estimators': 216, 'max_depth': 44, 'min_samples_split': 17, 'min_samples_leaf': 6}. Best is trial 0 with value: 0.784229757642909.
[I 2025-12-05 22:53:18,809] Trial 4 finished with value: 0.7770610367131576 and parameters: {'n


OPTUNA TUNING COMPLETE
Best Trial Score (Macro F1): 0.7938
Best Parameters:
  n_estimators: 238
  max_depth: 18
  min_samples_split: 13
  min_samples_leaf: 6
