In [4]:
import pygad
from sklearn.metrics import accuracy_score, roc_auc_score
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import joblib
import pickle
import pyswarms as ps
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
import neat

In [5]:
data = pd.read_csv('csgo_filtred.csv')

y = data['bomb_planted']
X = data.drop(columns=['bomb_planted'], axis=1)
mapping = {
    'de_inferno': 1,
    'de_dust2': 2,
    'de_nuke': 3,
    'de_mirage': 4,
    'de_overpass': 5,
    'de_train': 6,
    'de_vertigo': 7,
    'unknown': 8,
    'de_cache': 9
}
X['map'] = X['map'].map(mapping)
scaler = StandardScaler()
X = scaler.fit_transform(X)
oversample = SMOTE()
transformed_X, tranformed_y = oversample.fit_resample(X, y)
X, y = transformed_X, tranformed_y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.001, train_size=0.005, random_state=42, stratify=y)

[WinError 2] Не удается найти указанный файл
  File "c:\Users\motyn\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\motyn\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\motyn\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\motyn\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


# PyGAD

In [7]:
def fitness_dt(ga_instance, solution, solution_idx):
    max_depth, min_split, min_leaf = map(int, solution)
    model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_split,
        min_samples_leaf=min_leaf,
        random_state=42
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return accuracy_score(y_test, preds)

ga_dt = pygad.GA(
    num_generations=40,
    sol_per_pop=30,
    num_parents_mating=10,          
    num_genes=3,
    gene_space=[
        {'low': 1,  'high': 20},
        {'low': 2,  'high': 20},
        {'low': 1,  'high': 20}
    ],
    fitness_func=fitness_dt,
    parent_selection_type="sss",
    crossover_type="single_point",
    mutation_type="random"
)
ga_dt.run()
best_dt_solution, best_dt_fitness, _ = ga_dt.best_solution()
print("Лучшие параметры DT:", list(map(int, best_dt_solution)), "Accuracy:", best_dt_fitness)


def fitness_nb(ga_instance, solution, solution_idx):
    var_smoothing = 10 ** solution[0]
    model = GaussianNB(var_smoothing=var_smoothing)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return accuracy_score(y_test, preds)

ga_nb = pygad.GA(
    num_generations=30,
    sol_per_pop=20,
    num_parents_mating=5,           
    num_genes=1,
    gene_space=[{'low': -12, 'high': -6}],
    fitness_func=fitness_nb,
    parent_selection_type="sss",
    crossover_type="single_point",
    mutation_type="random"
)

ga_nb.run()
best_nb_solution, best_nb_fitness, _ = ga_nb.best_solution()
best_var_smoothing = 10 ** best_nb_solution[0]
print("Лучший var_smoothing для NB:", best_var_smoothing, "Accuracy:", best_nb_fitness)


Лучшие параметры DT: [18, 5, 8] Accuracy: 0.9854368932038835


If you do not want to mutate any gene, please set mutation_type=None.


Лучший var_smoothing для NB: 7.045764536422391e-12 Accuracy: 0.9223300970873787


# PySwarms

In [8]:
def dt_objective(params):
    n_particles = params.shape[0]
    losses = np.zeros(n_particles)
    for i in range(n_particles):
        max_depth, min_split, min_leaf = map(int, params[i])
        model = DecisionTreeClassifier(
            max_depth=max_depth,
            min_samples_split=min_split,
            min_samples_leaf=min_leaf,
            random_state=42
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        losses[i] = 1.0 - accuracy_score(y_test, preds)
    return losses

options_dt = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
bounds_dt = (np.array([1, 2, 1]), np.array([20, 20, 20]))

optimizer_dt = ps.single.GlobalBestPSO(
    n_particles=30,
    dimensions=3,
    options=options_dt,
    bounds=bounds_dt
)

best_cost_dt, best_pos_dt = optimizer_dt.optimize(
    dt_objective, iters=100, verbose=True
)

best_dt_params = list(map(int, best_pos_dt))
best_dt_acc = 1.0 - best_cost_dt
print("DT params:", best_dt_params, "accuracy:", best_dt_acc)


def nb_objective(params):
    n_particles = params.shape[0]
    losses = np.zeros(n_particles)
    for i in range(n_particles):
        log_vs = params[i][0]
        var_smoothing = 10 ** log_vs
        model = GaussianNB(var_smoothing=var_smoothing)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        losses[i] = 1.0 - accuracy_score(y_test, preds)
    return losses

options_nb = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
bounds_nb = (np.array([-12.0]), np.array([-6.0]))

optimizer_nb = ps.single.GlobalBestPSO(
    n_particles=20,
    dimensions=1,
    options=options_nb,
    bounds=bounds_nb
)

best_cost_nb, best_pos_nb = optimizer_nb.optimize(
    nb_objective, iters=80, verbose=True
)

best_log_vs = best_pos_nb[0]
best_nb_acc = 1.0 - best_cost_nb
best_var_smoothing = 10 ** best_log_vs
print("NB var_smoothing:", best_var_smoothing, "accuracy:", best_nb_acc)


2025-05-03 11:58:51,101 - pyswarms.single.global_best - INFO - Optimize for 100 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/100

pyswarms.single.global_best: 100%|██████████|100/100, best_cost=0.0146
2025-05-03 11:59:10,347 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.014563106796116498, best pos: [ 6.58453501 14.7653452   9.04534799]
2025-05-03 11:59:10,357 - pyswarms.single.global_best - INFO - Optimize for 80 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}


DT params: [6, 14, 9] accuracy: 0.9854368932038835


pyswarms.single.global_best: 100%|██████████|80/80, best_cost=0.0777
2025-05-03 11:59:13,274 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.07766990291262132, best pos: [-6.520279]


NB var_smoothing: 3.0180122791342283e-07 accuracy: 0.9223300970873787


# NEAT

In [10]:
import neat
import tempfile
import os

CONFIG_TEMPLATE = """
[NEAT]
fitness_criterion     = max
fitness_threshold     = 1.0
pop_size              = 200
reset_on_extinction   = False

[DefaultGenome]
# Параметры узлов
num_inputs                    = {num_inputs}
num_hidden                    = 0
num_outputs                   = {num_outputs}
feed_forward                  = True
initial_connection            = full
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient   = 0.5

# Параметры инициализации смещений
bias_init_mean          = 0.0
bias_init_stdev         = 1.0
bias_max_value          = 30.0
bias_min_value          = -30.0
bias_mutate_power       = 0.5
bias_mutate_rate        = 0.7
bias_replace_rate       = 0.1

# Параметры инициализации характеристик узлов (response)
response_init_mean      = 1.0
response_init_stdev     = 0.0
response_max_value      = 30.0
response_min_value      = -30.0
response_mutate_power   = 0.5
response_mutate_rate    = 0.1
response_replace_rate   = 0.2

# Активационные функции
activation_default      = sigmoid
activation_mutate_rate  = 0.05
activation_options      = sigmoid tanh relu

# Структурные мутации
node_add_prob           = 0.1
node_delete_prob        = 0.2

# Агрегация
aggregation_default         = sum
aggregation_mutate_rate     = 0.1
aggregation_options         = sum
aggregation_init_mean       = 0.0
aggregation_init_stdev      = 1.0
aggregation_max_value       = 30.0
aggregation_min_value       = -30.0

# Весовые коэффициенты
weight_init_mean         = 0.0
weight_init_stdev        = 1.0
weight_max_value         = 30.0
weight_min_value         = -30.0
weight_mutate_power      = 0.5
weight_mutate_rate       = 0.8
weight_replace_rate      = 0.1

# Соединения
enabled_default         = True
enabled_mutate_rate     = 0.1
conn_add_prob           = 0.3
conn_delete_prob        = 0.5

[DefaultSpeciesSet]
compatibility_threshold = 2.0

[DefaultStagnation]
species_fitness_func = max
max_stagnation       = 50

[DefaultReproduction]
elitism            = 3
survival_threshold = 0.2
"""

def make_neat_config(num_inputs: int,
                     num_outputs: int,
                     pop_size: int = 200,
                     fitness_threshold: float = 1.0):
    cfg_str = CONFIG_TEMPLATE.format(
        num_inputs=num_inputs,
        num_outputs=num_outputs
    )
    tmp = tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.cfg')
    tmp.write(cfg_str)
    tmp.flush()
    tmp.close()

    config = neat.Config(
        neat.DefaultGenome,
        neat.DefaultReproduction,
        neat.DefaultSpeciesSet,
        neat.DefaultStagnation,
        tmp.name
    )

    os.unlink(tmp.name)
    return config


In [11]:
import os

def load_and_prepare(path):
    data = pd.read_csv(path)
    y = data['collision'].values
    X = data.drop(columns=['collision']).values
    X = StandardScaler().fit_transform(X)
    return train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

def run(dataset_path, label):
    X_train, X_test, y_train, y_test = load_and_prepare(dataset_path)
    X_tr, X_val, y_tr, y_val = train_test_split(
        X_train, y_train, test_size=0.2,
        random_state=1, stratify=y_train
    )

    config = make_neat_config(
        num_inputs  = X_tr.shape[1],
        num_outputs = 2,
        pop_size    = 200,
        fitness_threshold = 1.0
    )

    pop = neat.Population(config)
    pop.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    pop.add_reporter(stats)

    def eval_genomes(genomes, config):
        for _, genome in genomes:
            net = neat.nn.FeedForwardNetwork.create(genome, config)
            pred = []
            for xi in X_tr:
                o = net.activate(xi)[0]
                pred.append(1 if o > 0.5 else 0)
            genome.fitness = accuracy_score(y_tr, pred)

    pop.run(eval_genomes, n=50)

    best = stats.best_genome()

    net = neat.nn.FeedForwardNetwork.create(best, config)

    def eval_net(net, X, y):
        preds = [1 if net.activate(xi)[0] > 0.5 else 0 for xi in X]
        return {
            'accuracy': accuracy_score(y, preds),
            'roc_auc' : roc_auc_score(y, preds)
        }

    train_metrics = eval_net(net, X_train, y_train)
    test_metrics  = eval_net(net, X_test,  y_test)
    print(f'\n=== Results for {label} ===')
    print('Train:', train_metrics)
    print('Test :', test_metrics)

    os.makedirs('models', exist_ok=True)
    with open(f'models/neat_{label}.pkl', 'wb') as f:
        pickle.dump((best, config), f)

    return {'label': label, **train_metrics, **test_metrics}

if __name__ == '__main__':
    results = []
    for path, lab in [
        ('cars_50_pairs_9_n_feature.csv', 'small'),
        ('cars_1430_pairs_8_n_feature.csv', 'big')
    ]:
        results.append(run(path, lab))

    print('\nComparison:')
    for r in results:
        print(r)



 ****** Running generation 0 ****** 

Population's average fitness: 0.50031 stdev: 0.06636
Best fitness: 0.68750 - size: (2, 28) - species 1 - id 72
Average adjusted fitness: 0.188
Mean genetic distance 1.555, standard deviation 0.688
Population of 200 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0   147      0.7    0.188     0
     2    0    53      0.5    0.188     0
Total extinctions: 0
Generation time: 0.414 sec

 ****** Running generation 1 ****** 

Population's average fitness: 0.54156 stdev: 0.06768
Best fitness: 0.71875 - size: (3, 28) - species 2 - id 204
Average adjusted fitness: 0.135
Mean genetic distance 1.847, standard deviation 0.723
Population of 200 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1    1   115      0.7    0.136     0
     2    1    77      0.7    0.134     0
     3    0     8       --       --     0
Total extinctions: 0
Generation time: 0.213 sec (0.314 average)

 ****** Running generation 2 ****** 

Po

  pickle.dump((best, config), f)


Population's average fitness: 0.49851 stdev: 0.03563
Best fitness: 0.58798 - size: (2, 24) - species 1 - id 159
Average adjusted fitness: 0.093
Mean genetic distance 1.439, standard deviation 0.348
Population of 200 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1    0   105      0.6    0.101     0
     2    0    88      0.6    0.085     0
     3    0     7       --       --     0
Total extinctions: 0
Generation time: 1.759 sec

 ****** Running generation 1 ****** 

Population's average fitness: 0.53221 stdev: 0.03058
Best fitness: 0.61421 - size: (2, 23) - species 1 - id 372
Average adjusted fitness: 0.083
Mean genetic distance 1.601, standard deviation 0.446
Population of 200 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1    1   111      0.6    0.080     0
     2    1    57      0.6    0.073     1
     3    1    32      0.6    0.095     0
Total extinctions: 0
Generation time: 1.801 sec (1.780 average)

 ****** Running generation 2 ****** 

  pickle.dump((best, config), f)
