<a href="https://colab.research.google.com/github/RizulThakur001/Med.ipynb/blob/main/Med.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Genetic Algorithm***

# **Decision Tree**

# **Entropy**

**Random state=42,test=0.2**

In [None]:
import random
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

def generate_population(size):
  """Generates a population of decision tree hyperparameters."""
  return [
      {
          'criterion': random.choice([ 'entropy']),
          'max_depth': random.randint(1, 10),
          'min_samples_split': random.randint(2, 20),
          'min_samples_leaf': random.randint(1, 10)
      }
      for _ in range(size)
  ]

def fitness(individual, x_train, x_test, y_train, y_test):
  """Calculates the fitness of a decision tree with given hyperparameters."""
  clf = DecisionTreeClassifier(**individual)
  clf.fit(x_train, y_train)
  y_pred = clf.predict(x_test)
  return accuracy_score(y_test, y_pred)

def selection(population, fitnesses):
  """Selects the fittest individuals (hyperparameter sets)."""
  return random.choices(population, weights=fitnesses,k=2)

def crossover(parent1, parent2):
  """Performs crossover between two sets of hyperparameters."""
  child = {}
  for key in parent1.keys():
    child[key] = random.choice([parent1[key], parent2[key]])
  return child

def mutation(individual, mutation_rate):
  """Introduces mutations in hyperparameters."""
  for key in individual.keys():
    if random.random() < mutation_rate:
      if key == 'criterion':
        individual[key] = random.choice([ 'entropy'])
      elif key == 'max_depth':
        individual[key] = random.randint(1, 10)
      elif key == 'min_samples_split':
        individual[key] = random.randint(2, 20)
      elif key == 'min_samples_leaf':
        individual[key] = random.randint(1, 10)
  return individual

# Load custom dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
df
x=df.iloc[:, 0:-1].values
y=df.iloc[:, -1].values
#split the data to train and test the data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=57)
from sklearn import tree
classifier=tree.DecisionTreeClassifier()
classifier.fit(x_train,y_train)
y_pred = classifier.predict(x_test)

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,y_pred)

population_size = 80
mutation_rate = 0.9

population = generate_population(population_size)

for generation in range(50):
  fitnesses = [fitness(individual, x_train, x_test, y_train, y_test) for individual in population]

  parent1, parent2 = selection(population, fitnesses)
  child = crossover(parent1, parent2)
  child = mutation(child, mutation_rate)

  population[-1] = child
best_individual = max(population, key=lambda individual: fitness(individual, x_train, x_test, y_train, y_test))

best_clf = DecisionTreeClassifier(**best_individual)
best_clf.fit(x_train, y_train)
y_pred = best_clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best hyperparameters:", best_individual)
print("Test accuracy:", accuracy)

Best hyperparameters: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 3}
Test accuracy: 0.9206349206349206


# ***GINI***

**random state=42,test=0.2**

In [None]:
import random
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder
def generate_population(size):
  """Generates a population of decision tree hyperparameters."""
  return [
      {
          'criterion': random.choice(['gini', 'entropy']),
          'max_depth': random.randint(1, 10),
          'min_samples_split': random.randint(2, 20),
          'min_samples_leaf': random.randint(1, 10)
      }
      for _ in range(size)
  ]

def fitness(individual, x_train, x_test, y_train, y_test):
  """Calculates the fitness of a decision tree with given hyperparameters."""
  clf = DecisionTreeClassifier(**individual)
  clf.fit(x_train, y_train)
  y_pred = clf.predict(x_test)
  return accuracy_score(y_test, y_pred)

def selection(population, fitnesses):
  """Selects the fittest individuals (hyperparameter sets)."""
  return random.choices(population, weights=fitnesses,k=2)

def crossover(parent1, parent2):
  """Performs crossover between two sets of hyperparameters."""
  child = {}
  for key in parent1.keys():
    child[key] = random.choice([parent1[key], parent2[key]])
  return child

def mutation(individual, mutation_rate):
  """Introduces mutations in hyperparameters."""
  for key in individual.keys():
    if random.random() < mutation_rate:
      if key == 'criterion':
        individual[key] = random.choice(['gini', 'entropy'])
      elif key == 'max_depth':
        individual[key] = random.randint(1, 10)
      elif key == 'min_samples_split':
        individual[key] = random.randint(2, 20)
      elif key == 'min_samples_leaf':
        individual[key] = random.randint(1, 10)
  return individual

# Load custom dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
df
x=df.iloc[:, 0:-1].values
y=df.iloc[:, -1].values
le=LabelEncoder()
y=le.fit_transform(y)
#split the data to train and test the data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
from sklearn import tree
classifier=tree.DecisionTreeClassifier()
classifier.fit(x,y)
y_pred = classifier.predict(x)

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y,y_pred)

population_size = 90
mutation_rate = 0.5

population = generate_population(population_size)

for generation in range(50):
  fitnesses = [fitness(individual, x_train, x_test, y_train, y_test) for individual in population]

  parent1, parent2 = selection(population, fitnesses)
  child = crossover(parent1, parent2)
  child = mutation(child, mutation_rate)

  population[-1] = child

best_individual = max(population, key=lambda individual: fitness(individual, x_train, x_test, y_train, y_test))

best_clf = DecisionTreeClassifier(**best_individual)
best_clf.fit(x_train, y_train)
y_pred = best_clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best hyperparameters:", best_individual)
print("Test accuracy:", accuracy)

Best hyperparameters: {'criterion': 'gini', 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 1}
Test accuracy: 0.9365079365079365


# ***Random Forest***

**random state=42,test=0.2**

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.optimize import differential_evolution

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/new_data.csv')

# Split the dataset into features and target
X = df.drop('RiskLevel', axis=1)
y = df['RiskLevel']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the random forest model
def random_forest_model(params):
    clf = RandomForestClassifier(n_estimators=int(params[0]), max_depth=int(params[1]), min_samples_split=int(params[2]), min_samples_leaf=int(params[3]))
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_train)
    accuracy = accuracy_score(y_train, y_pred)
    return -accuracy  # Negative accuracy as the fitness function

# Define the bounds for the genetic algorithm
bounds = [(10, 100), (1, 10), (2, 10), (1, 10)]  # n_estimators, max_depth, min_samples_split, min_samples_leaf

# Run the genetic algorithm
result = differential_evolution(random_forest_model, bounds, maxiter=100, popsize=50)

# Get the optimized parameters
opt_params = result.x

# Create the random forest model with the optimized parameters
clf_opt = RandomForestClassifier(n_estimators=int(opt_params[0]), max_depth=int(opt_params[1]), min_samples_split=int(opt_params[2]), min_samples_leaf=int(opt_params[3]))
clf_opt.fit(X_train, y_train)

# Evaluate the optimized model on the testing set
y_pred = clf_opt.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Optimized accuracy on testing set:", accuracy)

Optimized accuracy on testing set: 0.9126984126984127


# ***SVM***

**Random state=42,test=0.2**

In [None]:
!pip install deap

Collecting deap
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.4.1


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from deap import base, creator, tools, algorithms

# Load the Iris dataset
data=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X=data.drop('RiskLevel',axis=1)
y=data['RiskLevel']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Define evaluation function
def evaluate_svm(individual):
    C = individual[0]
    gamma = individual[1]
    model = SVC(C=C, gamma=gamma, kernel='rbf')
    scores = cross_val_score(model, X_train, y_train, cv=5)
    return scores.mean(),

# Set up genetic algorithm
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, 0.1, 100)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=2)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxBlend, alpha=0.1)
toolbox.register("mutate", tools.mutPolynomialBounded, low=0.1, up=100, eta=1.0, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate_svm)

population = toolbox.population(n=90)
# Genetic Algorithm parameters
NGEN = 40
CXPB = 0.5
MUTPB = 0.5

# Run Genetic Algorithm
for gen in range(NGEN):
    offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = list(map(toolbox.evaluate, offspring))

    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit

    population = toolbox.select(offspring, k=len(population))

best_individual = tools.selBest(population, k=1)[0]
print(f'Best individual: {best_individual}')
print(f'Best fitness: {best_individual.fitness.values[0]}')

# Train the final SVM model with the best hyperparameters
best_C = best_individual[0]
best_gamma = best_individual[1]
svm_model = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
svm_model.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = svm_model.predict(X_test)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))




Best individual: [74.16402612936587, 0.1383701158452107]
Best fitness: 0.9265742574257425
[[48  0  1]
 [ 0 32  2]
 [ 2  5 36]]
              precision    recall  f1-score   support

   high risk       0.96      0.98      0.97        49
    low risk       0.86      0.94      0.90        34
    mid risk       0.92      0.84      0.88        43

    accuracy                           0.92       126
   macro avg       0.92      0.92      0.92       126
weighted avg       0.92      0.92      0.92       126



# ***Naive Bayes***

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
from deap import base, creator, tools, algorithms
from sklearn.preprocessing import LabelEncoder
# Load the iris dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X = df.drop('RiskLevel',axis=1)
y = df['RiskLevel']
# Encode the target variable as a numerical label
le = LabelEncoder()
y = le.fit_transform(y)
#set into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define evaluation function
def evaluate_nb(individual):
    selected_features = [i for i, bit in enumerate(individual) if bit == 1]
    if not selected_features:
        return 0,  # Return 0 if no features are selected
    # Use .iloc to select columns by integer position
    X_train_selected = X_train.iloc[:, selected_features]
    nb_model = GaussianNB()
    scores = cross_val_score(nb_model, X_train_selected, y_train, cv=5)
    return scores.mean(),

# Set up genetic algorithm
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_bool", np.random.randint, 2)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate_nb)

population = toolbox.population(n=30)

# Genetic Algorithm parameters
NGEN = 40
CXPB = 0.5
MUTPB = 0.2

# Run Genetic Algorithm
for gen in range(NGEN):
    offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = list(map(toolbox.evaluate, offspring))

    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit

    population = toolbox.select(offspring, k=len(population))

best_individual = tools.selBest(population, k=1)[0]
print(f'Best individual: {best_individual}')
print(f'Best fitness: {best_individual.fitness.values[0]}')

# Get the best selected features
selected_features = [i for i, bit in enumerate(best_individual) if bit == 1]
# Use .iloc to select columns by integer position
X_train_selected = X_train.iloc[:, selected_features]
X_test_selected = X_test.iloc[:, selected_features]

# Train the final Naive Bayes model with the best selected features
nb_model = GaussianNB()
nb_model.fit(X_train_selected, y_train)

# Make predictions and evaluate the model
y_pred = nb_model.predict(X_test_selected)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))



Best individual: [0, 0, 1, 1, 1, 0, 0, 1, 0]
Best fitness: 0.8708120531154238
[[63  2  7]
 [ 0 49  8]
 [ 2  4 54]]
              precision    recall  f1-score   support

           0       0.97      0.88      0.92        72
           1       0.89      0.86      0.88        57
           2       0.78      0.90      0.84        60

    accuracy                           0.88       189
   macro avg       0.88      0.88      0.88       189
weighted avg       0.89      0.88      0.88       189



In [None]:
import numpy as np
import random
from deap import base, creator, tools, algorithms
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.preprocessing import LabelEncoder

# Load the iris dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Encode the target variable as a numerical label
le = LabelEncoder()
y = le.fit_transform(y)

# Define a fitness function
def evaluate(individual):
    # Decode the individual (chromosome) into feature mask
    mask = np.array(individual, dtype=bool)

    if np.sum(mask) == 0:  # Avoid selecting zero features
        return 0.0, # Return a tuple for consistency

    # Train Naive Bayes classifier on the selected features
    clf = GaussianNB()
    clf.fit(X.iloc[:, mask], y)

    # Calculate accuracy
    accuracy = clf.score(X.iloc[:, mask], y)

    return accuracy, # Return a tuple for consistency

# Define the genetic algorithm components
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Initialize toolbox
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Function to run the genetic algorithm
def main():
    # Initialize population
    population = toolbox.population(n=50)

    # Set up the algorithms
    NGEN = 20
    CXPB, MUTPB = 0.5, 0.2

    # Run the genetic algorithm
    for gen in range(NGEN):
        offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
        fits = list(map(toolbox.evaluate, offspring))

        for fit, ind in zip(fits, offspring):
            ind.fitness.values = fit # Assign the fitness value as a tuple

        population = toolbox.select(offspring, k=len(population))

    # Get the best individual
    best_ind = tools.selBest(population, k=1)[0]
    print(f'Best individual is {best_ind}')
    print(f'with fitness: {best_ind.fitness.values[0]}')

if __name__ == "__main__":
    main()



Best individual is [0, 0, 1, 1, 1, 0, 0, 1, 0]
with fitness: 0.8857142857142857


# ***knn***

**Rnadom state=42 and test =0.2**

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import random
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
# Load the dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X=df.drop('RiskLevel',axis=1)
y=df['RiskLevel']
# Genetic Algorithm parameters
population_size = 20
num_generations = 50
mutation_rate = 0.01
num_features = X.shape[1]
k_neighbors = 3  # Number of neighbors for k-NN
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize population
def initialize_population(pop_size, num_features):
    return np.random.randint(2, size=(pop_size, num_features))

# Fitness function
def fitness(chromosome, X_train, y_train, X_test, y_test): # Added X_test and y_test
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0 # Return 0 if no features are selected

    X_train_selected = X_train.iloc[:, selected_features] # Use iloc to select from DataFrame
    X_test_selected = X_test.iloc[:, selected_features]   # Use iloc to select from DataFrame
    model = KNeighborsClassifier(n_neighbors=k_neighbors)
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected) # Predict on the subset of X_test
    return accuracy_score(y_test, y_pred)

# Selection
def select_parents(population, fitnesses):
    parents = random.choices(population, weights=fitnesses, k=len(population))
    return np.array(parents)

# Crossover
def crossover(parent1, parent2):
    crossover_point = random.randint(1, num_features - 1)
    child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
    child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
    return child1, child2

# Mutation
def mutate(chromosome, mutation_rate):
    for i in range(len(chromosome)):
        if random.random() < mutation_rate:
            chromosome[i] = 1 - chromosome[i]

# Main Genetic Algorithm loop
population = initialize_population(population_size, num_features)
for generation in range(num_generations):
    # Pass X_test and y_test to the fitness function
    fitnesses = np.array([fitness(chromosome, X_train, y_train, X_test, y_test) for chromosome in population])
    print(f"Generation {generation}: Best Fitness = {np.max(fitnesses)}")

    new_population = []
    parents = select_parents(population, fitnesses)
    for i in range(0, len(parents), 2):
        parent1, parent2 = parents[i], parents[i+1]
        child1, child2 = crossover(parent1, parent2)
        mutate(child1, mutation_rate)
        mutate(child2, mutation_rate)
        new_population.extend([child1, child2])

    population = np.array(new_population)

# Best solution
# Pass X_test and y_test to the fitness function
best_chromosome = population[np.argmax([fitness(chromosome, X_train, y_train, X_test, y_test) for chromosome in population])]
best_features = np.where(best_chromosome == 1)[0]
print(f"Best chromosome: {best_chromosome}")
print(f"Selected features: {best_features}")

# Train final model with
X_train_selected = X_train.iloc[:, best_features]
X_test_selected = X_test.iloc[:, best_features]
final_model = KNeighborsClassifier(n_neighbors=k_neighbors)
final_model.fit(X_train_selected, y_train)
y_pred = final_model.predict(X_test_selected)
final_accuracy = accuracy_score(y_test, y_pred)
print(f"Final accuracy with selected features: {final_accuracy}")


Generation 0: Best Fitness = 0.8650793650793651
Generation 1: Best Fitness = 0.8333333333333334
Generation 2: Best Fitness = 0.8333333333333334
Generation 3: Best Fitness = 0.8571428571428571
Generation 4: Best Fitness = 0.8571428571428571
Generation 5: Best Fitness = 0.8412698412698413
Generation 6: Best Fitness = 0.8412698412698413
Generation 7: Best Fitness = 0.8412698412698413
Generation 8: Best Fitness = 0.8492063492063492
Generation 9: Best Fitness = 0.873015873015873
Generation 10: Best Fitness = 0.873015873015873
Generation 11: Best Fitness = 0.873015873015873
Generation 12: Best Fitness = 0.873015873015873
Generation 13: Best Fitness = 0.873015873015873
Generation 14: Best Fitness = 0.873015873015873
Generation 15: Best Fitness = 0.873015873015873
Generation 16: Best Fitness = 0.873015873015873
Generation 17: Best Fitness = 0.873015873015873
Generation 18: Best Fitness = 0.873015873015873
Generation 19: Best Fitness = 0.873015873015873
Generation 20: Best Fitness = 0.873015873

# ***XGB***

**Random state = 42, test=0.2**

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_breast_cancer
from deap import base, creator, tools, algorithms
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Load the dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X=df.drop('RiskLevel',axis=1)
y=df['RiskLevel']
le=LabelEncoder()
y=le.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the fitness function
def evaluate(individual):
    params = {
        'max_depth': int(individual[0]),
        'n_estimators': int(individual[1]),
        'learning_rate': individual[2],
        'subsample': individual[3]
    }
    model = xgb.XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss')
    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy')
    return scores.mean(),

# Setup the genetic algorithm
creator.create('FitnessMax', base.Fitness, weights=(1.0,))
creator.create('Individual', list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register('attr_int', np.random.randint, 1, 15)  # for max_depth
toolbox.register('attr_n_estimators', np.random.randint, 10, 200)  # for n_estimators
toolbox.register('attr_float_lr', np.random.uniform, 0.01, 0.3)  # for learning_rate
toolbox.register('attr_float_subsample', np.random.uniform, 0.5, 1.0)  # for subsample

toolbox.register('individual', tools.initCycle, creator.Individual,
                 (toolbox.attr_int, toolbox.attr_n_estimators, toolbox.attr_float_lr, toolbox.attr_float_subsample), n=1)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)

toolbox.register('mate', tools.cxTwoPoint)
toolbox.register('mutate', tools.mutPolynomialBounded, low=[1, 10, 0.01, 0.5], up=[15, 200, 0.3, 1.0], indpb=0.2, eta=1.0)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('evaluate', evaluate)

# Run the genetic algorithm
def main():
    population = toolbox.population(n=20)
    ngen = 40
    cxpb = 0.5
    mutpb = 0.2

    algorithms.eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=True)

    best_individual = tools.selBest(population, k=1)[0]
    print('Best Individual:', best_individual)
    print('Best Fitness:', best_individual.fitness.values[0])

    # Train the final model with the best parameters
    best_params = {
        'max_depth': int(best_individual[0]),
        'n_estimators': int(best_individual[1]),
        'learning_rate': best_individual[2],
        'subsample': best_individual[3]
    }
    final_model = xgb.XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
    final_model.fit(X_train, y_train)

    # Evaluate the final model on the test set
    score = final_model.score(X_test, y_test)
    print('Test Accuracy:', score)

if __name__ == '__main__':
    main()




gen	nevals
0  	20    
1  	17    
2  	9     
3  	13    
4  	8     
5  	11    
6  	14    
7  	14    
8  	14    
9  	9     
10 	12    
11 	10    
12 	7     
13 	9     
14 	16    
15 	13    
16 	12    
17 	15    
18 	11    
19 	13    
20 	13    
21 	14    
22 	11    
23 	9     
24 	14    
25 	12    
26 	18    
27 	16    
28 	10    
29 	10    
30 	9     
31 	10    
32 	9     
33 	10    
34 	8     
35 	14    
36 	8     
37 	12    
38 	14    
39 	11    
40 	12    
Best Individual: [11, 57, 0.03805623849708419, 0.6906903524559111]
Best Fitness: 0.9325396825396827
Test Accuracy: 0.9126984126984127


In [None]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_breast_cancer
from deap import base, creator, tools, algorithms
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Load the dataset
df=pd.read_csv('/content/drive/MyDrive/new_data.csv')
X=df.drop('RiskLevel',axis=1)
y=df['RiskLevel']
le=LabelEncoder()
y=le.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the fitness function
def evaluate(individual):
    params = {
        'max_depth': int(individual[0]),
        'n_estimators': int(individual[1]),
        'learning_rate': individual[2],
        'subsample': individual[3]
    }
    model = xgb.XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss')
    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy')
    return scores.mean(),

# Setup the genetic algorithm
creator.create('FitnessMax', base.Fitness, weights=(1.0,))
creator.create('Individual', list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register('attr_int', np.random.randint, 1, 15)  # for max_depth
toolbox.register('attr_n_estimators', np.random.randint, 10, 200)  # for n_estimators
toolbox.register('attr_float_lr', np.random.uniform, 0.01, 0.3)  # for learning_rate
toolbox.register('attr_float_subsample', np.random.uniform, 0.5, 1.0)  # for subsample

toolbox.register('individual', tools.initCycle, creator.Individual,
                 (toolbox.attr_int, toolbox.attr_n_estimators, toolbox.attr_float_lr, toolbox.attr_float_subsample), n=1)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)

toolbox.register('mate', tools.cxTwoPoint)
toolbox.register('mutate', tools.mutPolynomialBounded, low=[1, 10, 0.01, 0.5], up=[15, 200, 0.3, 1.0], indpb=0.2, eta=1.0)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('evaluate', evaluate)
def genetic_algorithm(population_size, generations, mutation_rate, nevals):
    best_params = genetic_algorithm(population_size=50, generations=10, mutation_rate=0.1, nevals=1000)
# Run the genetic algorithm
def main():
    population = toolbox.population(n=20)
    ngen = 40
    cxpb = 0.5
    mutpb = 0.2

    algorithms.eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=True)

    best_individual = tools.selBest(population, k=1)[0]
    print('Best Individual:', best_individual)
    print('Best Fitness:', best_individual.fitness.values[0])

    # Train the final model with the best parameters
    best_params = {
        'max_depth': int(best_individual[0]),
        'n_estimators': int(best_individual[1]),
        'learning_rate': best_individual[2],
        'subsample': best_individual[3]
    }
    final_model = xgb.XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
    final_model.fit(X_train, y_train)

    # Evaluate the final model on the test set
    score = final_model.score(X_test, y_test)
    print('Test Accuracy:', score)

if __name__ == '__main__':
    main()



gen	nevals
0  	20    
1  	11    
2  	13    
3  	9     
4  	13    
5  	15    
6  	11    
7  	13    
8  	10    
9  	16    
10 	14    
11 	12    
12 	10    
13 	12    
14 	9     
15 	11    
16 	11    
17 	6     
18 	12    
19 	16    
20 	8     
21 	7     
22 	16    
23 	13    
24 	14    
25 	12    
26 	14    
27 	15    
28 	11    
29 	15    
30 	12    
31 	9     
32 	10    
33 	9     
34 	13    
35 	14    
36 	14    
37 	14    
38 	10    
39 	12    
40 	11    
Best Individual: [13.011285022123843, 37, 0.06864291522596976, 0.8406915013215863]
Best Fitness: 0.9365079365079364
Test Accuracy: 0.9126984126984127


# ***THANK YOU***