**genetic algorithm on Credit Risk Modeling Using Genetic Algorithms with DEAP libarary**

In [1]:
pip install deap


Collecting deap
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/135.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m133.1/135.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.4.1


In [3]:
import pandas as pd
import numpy as np
from deap import base, creator, tools, algorithms
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [4]:
# Load the dataset
credit_data = pd.read_csv('/content/credit_data.csv')

In [5]:
# Split the data into features and target
X = credit_data.drop(columns=['CustomerID', 'Default'])
y = credit_data['Default']


In [6]:
# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:


# Define evaluation function
def evaluate(individual):
    n_estimators = int(individual[0])
    max_depth = int(individual[1])
    min_samples_split = int(individual[2])
    min_samples_leaf = int(individual[3])

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy,

# Setup DEAP
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_int", np.random.randint, 10, 200)
toolbox.register("attr_depth", np.random.randint, 1, 50)
toolbox.register("attr_samples_split", np.random.randint, 2, 10)
toolbox.register("attr_samples_leaf", np.random.randint, 1, 10)

toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_int, toolbox.attr_depth, toolbox.attr_samples_split, toolbox.attr_samples_leaf), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=[10, 1, 2, 1], up=[200, 50, 10, 10], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Genetic Algorithm parameters
population = toolbox.population(n=50)
n_generations = 20
crossover_prob = 0.7
mutation_prob = 0.2

# Run Genetic Algorithm
for gen in range(n_generations):
    offspring = algorithms.varAnd(population, toolbox, cxpb=crossover_prob, mutpb=mutation_prob)
    fits = list(map(toolbox.evaluate, offspring))

    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit

    population = toolbox.select(offspring, k=len(population))

    top_ind = tools.selBest(population, k=1)[0]
    print(f"Generation {gen}: Best Accuracy = {top_ind.fitness.values[0]}")

# Best individual
best_ind = tools.selBest(population, k=1)[0]
print("Best individual is:", best_ind)
print("with fitness:", best_ind.fitness.values)


Generation 0: Best Accuracy = 0.535
Generation 1: Best Accuracy = 0.545
Generation 2: Best Accuracy = 0.545
Generation 3: Best Accuracy = 0.545
Generation 4: Best Accuracy = 0.545
Generation 5: Best Accuracy = 0.545
Generation 6: Best Accuracy = 0.545
Generation 7: Best Accuracy = 0.545
Generation 8: Best Accuracy = 0.545
Generation 9: Best Accuracy = 0.545
Generation 10: Best Accuracy = 0.545
Generation 11: Best Accuracy = 0.545
Generation 12: Best Accuracy = 0.545
Generation 13: Best Accuracy = 0.545
Generation 14: Best Accuracy = 0.545
Generation 15: Best Accuracy = 0.545
Generation 16: Best Accuracy = 0.545
Generation 17: Best Accuracy = 0.545
Generation 18: Best Accuracy = 0.545
Generation 19: Best Accuracy = 0.545
Best individual is: [179, 18, 3, 4]
with fitness: (0.545,)
