In [1]:
!pip install pygad

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


In [None]:
import numpy as np
import pygad
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, classification_report
import joblib

data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def logreg_fitness(ga_instance, solution, solution_idx):
    C = 10 ** (solution[0] * 3 - 4) 
    penalty_idx = int(round(solution[1] * 3))  
    penalties = ['l1', 'l2', 'elasticnet', None]
    
    try:
        model = LogisticRegression(
            C=C,
            penalty=penalties[penalty_idx],
            solver='saga' if penalties[penalty_idx] in ['l1', 'elasticnet'] else 'lbfgs',
            max_iter=1000,
            random_state=42
        )
        score = cross_val_score(model, X_train_scaled, y_train, cv=3, scoring='accuracy').mean()
        return score
    except:
        return 0

num_generations = 30
num_parents_mating = 4
sol_per_pop = 20
num_genes = 2
gene_space = [{'low': 0, 'high': 1}, {'low': 0, 'high': 1}]

ga_instance_logreg = pygad.GA(
    num_generations=num_generations,
    num_parents_mating=num_parents_mating,
    fitness_func=logreg_fitness,
    sol_per_pop=sol_per_pop,
    num_genes=num_genes,
    gene_space=gene_space,
    mutation_type="random",
    mutation_probability=0.1
)

ga_instance_logreg.run()

best_solution_logreg = ga_instance_logreg.best_solution()
best_C = 10 ** (best_solution_logreg[0][0] * 3 - 4)
penalties = ['l1', 'l2', 'elasticnet', None]
best_penalty = penalties[int(round(best_solution_logreg[0][1] * 3))]

print(f"Лучшие параметры для LogisticRegression: C={best_C:.4f}, penalty={best_penalty}")

best_logreg = LogisticRegression(
    C=best_C,
    penalty=best_penalty,
    solver='saga' if best_penalty in ['l1', 'elasticnet'] else 'lbfgs',
    max_iter=1000,
    random_state=42
).fit(X_train_scaled, y_train)

def tree_fitness(ga_instance, solution, solution_idx):
    max_depth = int(round(solution[0] * 49 + 1)) 
    min_samples_split = int(round(solution[1] * 18 + 2))  
    min_samples_leaf = int(round(solution[2] * 9 + 1)) 
    criterion_idx = int(round(solution[3]))  
    criteria = ['gini', 'entropy']
    
    model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        criterion=criteria[criterion_idx],
        random_state=42
    )
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
    return score

num_genes = 4
gene_space = [
    {'low': 0, 'high': 1},  
    {'low': 0, 'high': 1},  
    {'low': 0, 'high': 1}, 
    {'low': 0, 'high': 1}  
]

ga_instance_tree = pygad.GA(
    num_generations=num_generations,
    num_parents_mating=num_parents_mating,
    fitness_func=tree_fitness,
    sol_per_pop=sol_per_pop,
    num_genes=num_genes,
    gene_space=gene_space,
    mutation_type="random",
    mutation_probability=0.1
)

ga_instance_tree.run()

best_solution_tree = ga_instance_tree.best_solution()
best_max_depth = int(round(best_solution_tree[0][0] * 49 + 1))
best_min_samples_split = int(round(best_solution_tree[0][1] * 18 + 2))
best_min_samples_leaf = int(round(best_solution_tree[0][2] * 9 + 1))
best_criterion = ['gini', 'entropy'][int(round(best_solution_tree[0][3]))]

print(f"Лучшие параметры для DecisionTree: max_depth={best_max_depth}, min_samples_split={best_min_samples_split}, "
      f"min_samples_leaf={best_min_samples_leaf}, criterion={best_criterion}")

best_tree = DecisionTreeClassifier(
    max_depth=best_max_depth,
    min_samples_split=best_min_samples_split,
    min_samples_leaf=best_min_samples_leaf,
    criterion=best_criterion,
    random_state=42
).fit(X_train, y_train)

y_pred_logreg = best_logreg.predict(X_test_scaled)
print("\nLogistic Regression:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_logreg):.4f}")
print(classification_report(y_test, y_pred_logreg))

y_pred_tree = best_tree.predict(X_test)
print("\nDecision Tree:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tree):.4f}")
print(classification_report(y_test, y_pred_tree))

joblib.dump(best_logreg, 'best_logreg_ga.joblib')
joblib.dump(scaler, 'scaler_ga.joblib')
joblib.dump(best_tree, 'best_tree_ga.joblib')



Лучшие параметры для LogisticRegression: C=0.0027, penalty=None
Лучшие параметры для DecisionTree: max_depth=14, min_samples_split=16, min_samples_leaf=3, criterion=gini

Logistic Regression:
Accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Decision Tree:
Accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        

['best_tree_ga.joblib']