In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import joblib
import neat

In [2]:
df_small = pd.read_csv('dataset_1.csv')
df_large = pd.read_csv('dataset_12.csv')

figure_dict = {'circle': 0, 'rectangle': 1}
color_dict = {'red': 0, 'blue': 1, 'green': 2, 'yellow': 3}

df_small['figure1'] = df_small['figure1'].map(figure_dict)
df_small['figure2'] = df_small['figure2'].map(figure_dict)
df_large['figure1'] = df_large['figure1'].map(figure_dict)
df_large['figure2'] = df_large['figure2'].map(figure_dict)
df_large['color1'] = df_large['color1'].map(color_dict)
df_large['color2'] = df_large['color2'].map(color_dict)

In [3]:
# Определение классов QuadTreeNode и ESHyperNEAT (без изменений)
class QuadTreeNode:
    def __init__(self, x_min, x_max, y_min, y_max, depth=0, max_depth=4):
        self.x_min, self.x_max = x_min, x_max
        self.y_min, self.y_max = y_min, y_max
        self.depth = depth
        self.max_depth = max_depth
        self.children = []
        self.neuron = None
        self.variance = None

    def split(self):
        x_mid = (self.x_min + self.x_max) / 2
        y_mid = (self.y_min + self.y_max) / 2
        self.children = [
            QuadTreeNode(self.x_min, x_mid, self.y_min, y_mid, self.depth + 1, self.max_depth),
            QuadTreeNode(x_mid, self.x_max, self.y_min, y_mid, self.depth + 1, self.max_depth),
            QuadTreeNode(self.x_min, x_mid, y_mid, self.y_max, self.depth + 1, self.max_depth),
            QuadTreeNode(x_mid, self.x_max, y_mid, self.y_max, self.depth + 1, self.max_depth)
        ]

    def place_neuron(self, x, y):
        self.neuron = (x, y)

In [4]:
def build_quadtree(cppn, x_min, x_max, y_min, y_max, max_depth=4, variance_threshold=0.1):
    def compute_variance(node, cppn):
        points = [
            (node.x_min, node.y_min), (node.x_max, node.y_min),
            (node.x_min, node.y_max), (node.x_max, node.y_max),
            ((node.x_min + node.x_max) / 2, (node.y_min + node.y_max) / 2)
        ]
        outputs = []
        for x, y in points:
            density = cppn.activate([x, y, 0, 0])[2]
            outputs.append(density)
        node.variance = np.var(outputs)
        return node.variance

    root = QuadTreeNode(x_min, x_max, y_min, y_max, max_depth=max_depth)
    nodes = [root]
    hidden_coordinates = []

    while nodes:
        node = nodes.pop(0)
        if node.depth >= node.max_depth:
            continue
        variance = compute_variance(node, cppn)
        if variance > variance_threshold:
            node.split()
            nodes.extend(node.children)
        else:
            x = (node.x_min + node.x_max) / 2
            y = (node.y_min + node.y_max) / 2
            node.place_neuron(x, y)
            hidden_coordinates.append((x, y))
    return hidden_coordinates

In [5]:
class ESHyperNEAT:
    def __init__(self, config_file, n_features):
        self.config_file = config_file
        self.n_features = n_features
        self.input_coordinates = [(0, i) for i in range(n_features)]
        self.output_coordinates = [(1, 0)]
        self.config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                                 neat.DefaultSpeciesSet, neat.DefaultStagnation,
                                 config_file)
    
    def eval_fitness(self, genomes, X_train, y_train):
        for genome_id, genome in genomes:
            cppn = neat.nn.FeedForwardNetwork.create(genome, self.config)
            hidden_coordinates = build_quadtree(cppn, x_min=0.3, x_max=0.7, y_min=-1, y_max=1, max_depth=4)
            substrate_weights = {}
            for src in self.input_coordinates + hidden_coordinates:
                for dst in hidden_coordinates + self.output_coordinates:
                    outputs = cppn.activate(src + dst)
                    weight = outputs[0]
                    connectivity = outputs[1]
                    if connectivity > 0:
                        substrate_weights[(src, dst)] = weight
            fitness = 0
            for xi, xo in zip(X_train, y_train):
                node_values = {coord: 0 for coord in self.input_coordinates + hidden_coordinates + self.output_coordinates}
                for i, coord in enumerate(self.input_coordinates):
                    node_values[coord] = xi[i]
                for (src, dst), weight in substrate_weights.items():
                    node_values[dst] += node_values[src] * weight
                output_value = node_values[(1, 0)]
                output_value = 1 / (1 + np.exp(-output_value))
                fitness += -xo * np.log(max(output_value, 1e-10)) - (1 - xo) * np.log(max(1 - output_value, 1e-10))
            genome.fitness = -fitness
    
    def run(self, X_train, y_train):
        p = neat.Population(self.config)
        p.add_reporter(neat.StdOutReporter(True))
        stats = neat.StatisticsReporter()
        p.add_reporter(stats)
        winner = p.run(lambda genomes, config: self.eval_fitness(genomes, X_train, y_train), 100)
        return winner
    
    def evaluate(self, X_test, y_test, winner):
        cppn = neat.nn.FeedForwardNetwork.create(winner, self.config)
        hidden_coordinates = build_quadtree(cppn, x_min=0.3, x_max=0.7, y_min=-1, y_max=1, max_depth=4)
        substrate_weights = {}
        for src in self.input_coordinates + hidden_coordinates:
            for dst in hidden_coordinates + self.output_coordinates:
                outputs = cppn.activate(src + dst)
                weight = outputs[0]
                connectivity = outputs[1]
                if connectivity > 0:
                    substrate_weights[(src, dst)] = weight
        predictions = []
        for xi in X_test:
            node_values = {coord: 0 for coord in self.input_coordinates + hidden_coordinates + self.output_coordinates}
            for i, coord in enumerate(self.input_coordinates):
                node_values[coord] = xi[i]
            for (src, dst), weight in substrate_weights.items():
                node_values[dst] += node_values[src] * weight
            output_value = node_values[(1, 0)]
            output_value = 1 / (1 + np.exp(-output_value))
            predictions.append(output_value)
        predicted_classes = [1 if p > 0.5 else 0 for p in predictions]
        f1 = f1_score(y_test, predicted_classes)
        print(f"Test F1-score: {f1}")
        return {
            'f1_score': f1,
            'winner': winner,
            'cppn': cppn,
            'hidden_coordinates': hidden_coordinates,
            'substrate_weights': substrate_weights
        }

In [6]:
# Обработка и обучение для df_small
X_small = df_small.drop('collision', axis=1)
y_small = df_small['collision']
scaler_small = StandardScaler()
X_small_scaled = scaler_small.fit_transform(X_small)
X_train_small, X_test_small, y_train_small, y_test_small = train_test_split(
    X_small_scaled, y_small, test_size=0.2, random_state=42
)

model_small = ESHyperNEAT('es_config.ini', n_features=4)
winner_small = model_small.run(X_train_small, y_train_small)
result_small = model_small.evaluate(X_test_small, y_test_small, winner_small)


 ****** Running generation 0 ****** 

Population's average fitness: -108.25450 stdev: 136.49981
Best fitness: -27.72589 - size: (3, 6) - species 1 - id 53
Average adjusted fitness: 0.814
Mean genetic distance 1.875, standard deviation 0.393
Population of 150 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0   150    -27.7    0.814     0
Total extinctions: 0
Generation time: 0.203 sec

 ****** Running generation 1 ****** 



  output_value = 1 / (1 + np.exp(-output_value))


Population's average fitness: -30.72260 stdev: 15.10794
Best fitness: -27.72589 - size: (3, 6) - species 1 - id 53
Average adjusted fitness: 0.980
Mean genetic distance 1.724, standard deviation 0.480
Population of 150 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1   150    -27.7    0.980     1
Total extinctions: 0
Generation time: 0.150 sec (0.176 average)

 ****** Running generation 2 ****** 

Population's average fitness: -28.99891 stdev: 6.44642
Best fitness: -27.72589 - size: (3, 5) - species 1 - id 194
Average adjusted fitness: 0.978
Mean genetic distance 1.745, standard deviation 0.456
Population of 150 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    2   150    -27.7    0.978     2
Total extinctions: 0
Generation time: 0.156 sec (0.170 average)

 ****** Running generation 3 ****** 

Population's average fitness: -32.20888 stdev: 35.54112
Best fitness: -27.72589 - size: (3, 5) - species 1 - id 194
Average adjusted fitness: 0.

In [7]:
# Обработка и обучение для df_large
X_large = df_large.drop('collision', axis=1)
y_large = df_large['collision']
scaler_large = StandardScaler()
X_large_scaled = scaler_large.fit_transform(X_large)
X_train_large, X_test_large, y_train_large, y_test_large = train_test_split(
    X_large_scaled, y_large, test_size=0.2, random_state=42
)

model_large = ESHyperNEAT('es_config.ini', n_features=6)
winner_large = model_large.run(X_train_large, y_train_large)
result_large = model_large.evaluate(X_test_large, y_test_large, winner_large)


 ****** Running generation 0 ****** 



  output_value = 1 / (1 + np.exp(-output_value))


Population's average fitness: -1813.82008 stdev: 2065.51042
Best fitness: -553.10514 - size: (3, 6) - species 1 - id 31
Average adjusted fitness: 0.848
Mean genetic distance 1.816, standard deviation 0.352
Population of 150 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0   143   -553.1    0.848     0
     2    0     7       --       --     0
Total extinctions: 0
Generation time: 1.863 sec

 ****** Running generation 1 ****** 

Population's average fitness: -1152.87181 stdev: 1678.55386
Best fitness: -552.88577 - size: (4, 7) - species 1 - id 182
Average adjusted fitness: 0.956
Mean genetic distance 1.860, standard deviation 0.442
Population of 150 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    1    55   -552.9    0.932     0
     2    1    95   -553.9    0.981     0
Total extinctions: 0
Generation time: 1.853 sec (1.858 average)

 ****** Running generation 2 ****** 

Population's average fitness: -1040.08319 stdev: 1741.10988
Best 

In [8]:
# Сравнение моделей
print("\nСравнение моделей:")
print(f"F1-score на df_small: {result_small['f1_score']:.4f}")
print(f"F1-score на df_large: {result_large['f1_score']:.4f}")
if result_small['f1_score'] > result_large['f1_score']:
    print("Модель на df_small показала лучший результат.")
else:
    print("Модель на df_large показала лучший результат.")



Сравнение моделей:
F1-score на df_small: 0.6667
F1-score на df_large: 0.6027
Модель на df_small показала лучший результат.


In [9]:
# Сохранение моделей и результатов
joblib.dump({
    'model': model_small,
    'winner': winner_small,
    'result': result_small,
    'scaler': scaler_small
}, '4_model_small.pkl')

joblib.dump({
    'model': model_large,
    'winner': winner_large,
    'result': result_large,
    'scaler': scaler_large
}, '4_model_large.pkl')

['4_model_large.pkl']