In [4]:
import neat
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from joblib import dump
import pandas as pd

# 1. Загрузка данных
def load_small_dataset():
    dataset = pd.read_csv('dataset_1.csv')
    figure_dict = {
    'circle': 0,
    'rectangle': 1
    }

    dataset['figure1'] = dataset['figure1'].map(figure_dict)
    dataset['figure2'] = dataset['figure2'].map(figure_dict)

    X = dataset.drop('collision', axis=1)
    y = dataset['collision']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test

def load_big_dataset():
    dataset = pd.read_csv('dataset_12.csv')
    figure_dict = {
    'circle': 0,
    'rectangle': 1
    }
    color_dict = {
    'red' : 0, 
    'blue' : 1, 
    'green' : 2, 
    'yellow' : 3
    }
    dataset['figure1'] = dataset['figure1'].map(figure_dict)
    dataset['figure2'] = dataset['figure2'].map(figure_dict)

    dataset['color1'] = dataset['color1'].map(color_dict)
    dataset['color2'] = dataset['color2'].map(color_dict)

    X = dataset.drop('collision', axis=1)
    y = dataset['collision']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test

# 2. Функция оценки для NEAT
def eval_genomes(genomes, config, X_train, y_train, X_test, y_test):
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        outputs = []
        for xi in X_train:
            output = net.activate(xi)
            outputs.append(np.argmax(output))  # Предсказание класса
        accuracy = np.mean(outputs == y_train)
        genome.fitness = accuracy

# 3. Настройка и запуск NEAT
def run_neat(X_train, X_test, y_train, y_test, config_file, name):
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                        neat.DefaultSpeciesSet, neat.DefaultStagnation,
                        config_file)
    
    p = neat.Population(config)
    p.add_reporter(neat.StdOutReporter(True))
    winner = p.run(lambda genomes, config: eval_genomes(genomes, config, X_train, y_train, X_test, y_test), 50)
    
    # Оценка лучшей сети
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    test_outputs = [np.argmax(winner_net.activate(xi)) for xi in X_test]
    test_accuracy = np.mean(test_outputs == y_test)
    print(f"Точность на тестовом наборе ({name}): {test_accuracy}")
    
    return winner, winner_net, test_accuracy

# 4. Сохранение модели

def save_model(winner, config, filename):
    dump((winner, config), filename)


# Маленький набор данных
X_train_small, X_test_small, y_train_small, y_test_small = load_small_dataset()
config_file_small = "config_1dataset"  # Файл конфигурации NEAT
winner_small, net_small, acc_small = run_neat(X_train_small, X_test_small, y_train_small, y_test_small, config_file_small, "маленький")
save_model(winner_small, config_file_small, "4_smalldata.pkl")

# Большой набор данных
X_train_big, X_test_big, y_train_big, y_test_big = load_big_dataset()
config_file_big = "config_2dataset"  # Файл конфигурации NEAT
winner_big, net_big, acc_big = run_neat(X_train_big, X_test_big, y_train_big, y_test_big, config_file_big, "большой")
save_model(winner_big, config_file_big, "4_bigdata.pkl")

# Сравнение
print("\nСравнение моделей:")
print(f"Точность маленькой модели: {acc_small}")
print(f"Точность большой модели: {acc_big}")
print(f"Сложность маленькой модели (узлы): {len(winner_small.nodes)}")
print(f"Сложность большой модели (узлы): {len(winner_big.nodes)}")


 ****** Running generation 0 ****** 

Population's average fitness: 0.37500 stdev: 0.00000
Best fitness: 0.37500 - size: (1, 8) - species 1 - id 1
Average adjusted fitness: 0.000
Mean genetic distance 1.422, standard deviation 0.512
Population of 200 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0    77      0.4    0.000     0
     2    0   123      0.4    0.000     0
Total extinctions: 0
Generation time: 0.054 sec

 ****** Running generation 1 ****** 

Population's average fitness: 0.37500 stdev: 0.00000
Best fitness: 0.37500 - size: (1, 8) - species 1 - id 1
Average adjusted fitness: 0.000
Mean genetic distance 1.644, standard deviation 0.523
Population of 200 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1    1    87      0.4    0.000     1
     2    1   106      0.4    0.000     1
     3    0     7       --       --     0
Total extinctions: 0
Generation time: 0.074 sec (0.064 average)

 ****** Running generation 2 ****** 

Populat