In [1]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from tqdm.notebook import tqdm

In [2]:
ROOT_DIR = "/media/plerolland/LaBoite/PublicData/training/classification/scalograms"
SEED = 0
FOLDS = 5

In [3]:
pos = np.load(f"{ROOT_DIR}/positives.npy")
neg = np.load(f"{ROOT_DIR}/negatives.npy")
X = np.concatenate((pos, neg))
Y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))

np.random.seed(SEED)
p = np.random.permutation(len(X))
X, Y = X[p], Y[p]

In [4]:
X_trains, Y_trains, X_valids, Y_valids = [], [], [], []
for i in range(FOLDS):
    start_valid_idx = int(len(X) * i / FOLDS)
    end_valid_idx = int(len(X) * (i + 1) / FOLDS)
    X_trains.append(np.concatenate((X[:start_valid_idx], X[end_valid_idx:])))
    Y_trains.append(np.concatenate((Y[:start_valid_idx], Y[end_valid_idx:])))
    X_valids.append(X[start_valid_idx:end_valid_idx])
    Y_valids.append(Y[start_valid_idx:end_valid_idx])

In [5]:
n_estimators_to_try = range(10, 80, 10)
train_scores = []
val_scores = []
for n_estimators in tqdm(n_estimators_to_try, desc="grid loop", position=0):
    train_score = 0
    val_score = 0
    for i in tqdm(range(FOLDS), desc="folds loop", position=1, leave=False):
        classifier = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.001, max_depth=4, random_state=0)
        res = classifier.fit(X_trains[i], Y_trains[i])
        train_score += res.score(X_trains[i], Y_trains[i])
        val_score += res.score(X_valids[i], Y_valids[i])
    train_scores.append(train_score/FOLDS)
    val_scores.append(val_score/FOLDS)
    print(f"Score associated with tried value {n_estimators} : {val_scores[-1]} (training score of {train_scores[-1]})")

grid loop:   0%|          | 0/7 [00:00<?, ?it/s]

folds loop:   0%|          | 0/5 [00:00<?, ?it/s]

Score associated with tried value 10 : 0.6312791736393982 (training score of 0.6363653123279637)


folds loop:   0%|          | 0/5 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [19]:
selected_n_estimators = 1

classifier = GradientBoostingClassifier(n_estimators=selected_n_estimators, learning_rate=1, max_depth=4, random_state=0)
score = 0
for i in tqdm(range(FOLDS), desc="folds loop"):
    res = classifier.fit(X_trains[i], Y_trains[i])
    score += res.score(X_valids[i], Y_valids[i])
print(score / FOLDS)

folds loop:   0%|          | 0/5 [00:00<?, ?it/s]

0.714354681484957
