In [2]:
from numpy import mean, std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.linear_model import Perceptron

# Definir dataset
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2,
                           n_redundant=1, random_state=1)

# Definir el modelo
model = Perceptron()

# Definir procedimiento de evaluación (10 particiones, repetidas 3 veces)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

# Evaluar el modelo
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

# Reportar resultado
print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean Accuracy: 0.786 (0.069)


In [1]:
from numpy.random import randn
from numpy.random import rand
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import Perceptron
from numpy import mean  # Este import faltaba

# Objective function
def objective(X, y, cfg):
    # unpack config
    eta, alpha = cfg
    # define model
    model = Perceptron(penalty='elasticnet', alpha=alpha, eta0=eta)
    # define evaluation procedure
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    # evaluate model
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
    # calculate mean accuracy
    result = mean(scores)
    return result

# Take a step in the search space
def step(cfg, step_size):
    # unpack the configuration
    eta, alpha = cfg
    # step eta
    new_eta = eta + randn() * step_size
    # check the bounds of eta
    if new_eta <= 0.0:
        new_eta = 1e-8
    if new_eta > 1.0:
        new_eta = 1.0
    # step alpha
    new_alpha = alpha + randn() * step_size
    # check the bounds of alpha
    if new_alpha < 0.0:
        new_alpha = 0.0
    # return the new configuration
    return [new_eta, new_alpha]

# Hill climbing local search algorithm
def hillclimbing(X, y, objective, n_iter, step_size):
    # starting point for the search
    solution = [rand(), rand()]
    # evaluate the initial point
    solution_eval = objective(X, y, solution)
    # run the hill climb
    for i in range(n_iter):
        # take a step
        candidate = step(solution, step_size)
        # evaluate candidate point
        candidate_eval = objective(X, y, candidate)
        # check if we should keep the new point
        if candidate_eval >= solution_eval:
            # store the new point
            solution, solution_eval = candidate, candidate_eval
            # report progress
            print('>%d, cfg=%s %.5f' % (i, solution, solution_eval))
    return [solution, solution_eval]

# Define dataset
X, y = make_classification(
    n_samples=1000, n_features=5, n_informative=2,
    n_redundant=1, random_state=1
)

# Define the total iterations
n_iter = 100

# Step size in the search space
step_size = 0.1

# Perform the hill climbing search
cfg, score = hillclimbing(X, y, objective, n_iter, step_size)
print('Done!')
print('cfg=%s: Mean Accuracy: %f' % (cfg, score))

>14, cfg=[0.9387093220468394, 0.40686640499929094] 0.73867
>17, cfg=[0.8884165784917945, 0.42461627054728185] 0.74000
>20, cfg=[0.8828433331105711, 0.2608781821828061] 0.75433
>22, cfg=[0.9136123173513523, 0.14794072113639195] 0.76367
>24, cfg=[0.878791008369527, 0.005370806984794946] 0.77533
>27, cfg=[0.8708383526669874, 0.0] 0.78633
>28, cfg=[0.697018104079431, 0.0] 0.78633
>29, cfg=[0.675747774336744, 0.0] 0.78633
>32, cfg=[0.6973564560560407, 0.0] 0.78633
>35, cfg=[0.7698625651626905, 0.0] 0.78633
>36, cfg=[1.0, 0.0] 0.78633
>37, cfg=[1.0, 0.0] 0.78633
>40, cfg=[0.9051984154420494, 0.0] 0.78633
>41, cfg=[0.6853075324968534, 0.0] 0.78633
>45, cfg=[0.8689364038266452, 0.0] 0.78633
>47, cfg=[1.0, 0.0] 0.78633
>51, cfg=[1.0, 0.0] 0.78633
>53, cfg=[1.0, 0.0] 0.78633
>54, cfg=[1.0, 0.0] 0.78633
>57, cfg=[1.0, 0.0] 0.78633
>58, cfg=[1.0, 0.12579897095642487] 0.78700
>67, cfg=[0.9962127030246269, 0.0697541431573118] 0.79367
Done!
cfg=[0.9962127030246269, 0.0697541431573118]: Mean Accuracy:

In [5]:
import xgboost
print("xgboost", xgboost.__version__)

xgboost 3.0.2


In [7]:
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from xgboost import XGBClassifier

# Define dataset
X, y = make_classification(
    n_samples=1000, n_features=5, n_informative=2,
    n_redundant=1, random_state=1
)

# Define model
model = XGBClassifier(use_label_encoder=False, eval_metric="logloss")

# Define evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

# Evaluate model
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

# Report result
print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean Accuracy: 0.853 (0.038)


In [11]:
from numpy import mean
from numpy.random import randn, rand, randint
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from xgboost import XGBClassifier

# Objective function
def objective(X, y, cfg):
    # unpack config
    lrate, n_tree, subsam, depth = cfg
    # define model
    model = XGBClassifier(
        learning_rate=lrate,
        n_estimators=n_tree,
        subsample=subsam,
        max_depth=depth,
        use_label_encoder=False,
        eval_metric="logloss"
    )
    # define evaluation procedure
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    # evaluate model
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
    # calculate mean accuracy
    result = mean(scores)
    return result

# Take a step in the search space
def step(cfg):
    # unpack config
    lrate, n_tree, subsam, depth = cfg

    # learning rate
    lrate = lrate + randn() * 0.01
    if lrate <= 0.0:
        lrate = 1e-8
    if lrate > 1:
        lrate = 1.0

    # number of trees
    n_tree = round(n_tree + randn() * 50)
    if n_tree <= 0:
        n_tree = 1

    # subsample percentage
    subsam = subsam + randn() * 0.1
    if subsam <= 0.0:
        subsam = 1e-8
    if subsam > 1:
        subsam = 1.0

    # max tree depth
    depth = round(depth + randn() * 7)
    if depth <= 1:
        depth = 1

    # return new config
    return [lrate, n_tree, subsam, depth]

# Hill climbing local search algorithm
def hillclimbing(X, y, objective, n_iter):
    # starting point for the search
    solution = step([0.1, 100, 1.0, 7])
    # evaluate the initial point
    solution_eval = objective(X, y, solution)

    # run the hill climb
    for i in range(n_iter):
        # take a step
        candidate = step(solution)
        # evaluate candidate point
        candidate_eval = objective(X, y, candidate)
        # check if we should keep the new point
        if candidate_eval >= solution_eval:
            # store the new point
            solution, solution_eval = candidate, candidate_eval
            # report progress
            print('>%d, cfg=%s %.5f' % (i, solution, solution_eval))

    return [solution, solution_eval]

# Define dataset
X, y = make_classification(
    n_samples=1000, n_features=5, n_informative=2,
    n_redundant=1, random_state=1
)

# Define the total iterations
n_iter = 200

# Perform the hill climbing search
cfg, score = hillclimbing(X, y, objective, n_iter)
print('Done!')
print('cfg=%s: Mean Accuracy: %f' % (cfg, score))

[0.09444862242800633, 73, 0.9031754036032325, 15]
[0.09954808696740987, 33, 0.7991716347099277, 4]
>0, cfg=[0.09954808696740987, 33, 0.7991716347099277, 4] 0.86767
[0.09607040161573743, 1, 0.9180350707511166, 1]
[0.10795870490019147, 56, 0.8222236669001263, 6]
[0.10138902526315281, 73, 0.8573765007261418, 2]
[0.09142794573608556, 1, 0.9292195958515741, 7]
[0.08031747118176766, 106, 0.8922378358297796, 3]
>5, cfg=[0.08031747118176766, 106, 0.8922378358297796, 3] 0.86933
[0.09590860808794516, 203, 0.8411944062996836, 2]
[0.07329968710369983, 93, 0.9456470842735472, 1]
[0.07326227898520252, 94, 0.9115847605166154, 14]
[0.08400437602643494, 164, 0.9988596041895473, 1]
[0.06399326331676669, 106, 1.0, 3]
[0.05700976155067958, 133, 0.9633574618350004, 4]
[0.09818568277105832, 114, 0.9945444536367695, 13]
[0.08093620027079117, 68, 0.9061286315265176, 1]
[0.09313419641785424, 76, 0.8811552007966645, 5]
[0.07328537856486743, 65, 0.8748452417682592, 1]
[0.07725038671602082, 81, 0.8578647520927447

KeyboardInterrupt: 