<a href="https://colab.research.google.com/github/Piyushnagdeote/Genetic-Algorithm-GA-Implementation/blob/main/Genetic_Algorithm_(GA)_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
#Paper 3: "A Hybrid Genetic Algorithm-Particle Swarm Optimization Framework to Enhance Network Intrusion Detection Systems"

#Setup
pip install numpy scikit-learn deap




In [14]:
#Import Libraries

import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from deap import base, creator, tools, algorithms
import random

In [7]:
#Data Preparation

# Load sample dataset (replace with actual NIDS dataset)
data = load_digits()
X = data.data
y = data.target

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [8]:
#Genetic Algorithm (GA) Implementation

# Define the evaluation function
def evaluate(individual):
    # Select features based on the individual
    selected_features = [i for i, bit in enumerate(individual) if bit == 1]
    if len(selected_features) == 0:
        return 0,

    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    # Train and evaluate the classifier
    clf = RandomForestClassifier(n_estimators=50, random_state=42)
    clf.fit(X_train_selected, y_train)
    predictions = clf.predict(X_test_selected)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy,

# GA Configuration
toolbox = base.Toolbox()
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

def run_ga():
    population = toolbox.population(n=50)
    ngen = 20
    cxpb = 0.5
    mutpb = 0.2

    for gen in range(ngen):
        offspring = algorithms.varAnd(population, toolbox, cxpb, mutpb)
        fits = toolbox.map(toolbox.evaluate, offspring)

        for fit, ind in zip(fits, offspring):
            ind.fitness.values = fit

        population = toolbox.select(offspring, k=len(population))

    top_individual = tools.selBest(population, k=1)[0]
    return top_individual

# Run GA
best_individual_ga = run_ga()
print("Best individual from GA:", best_individual_ga)




Best individual from GA: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]


In [9]:
#Particle Swarm Optimization (PSO) Implementation

# PSO Configuration
def pso(n_particles, n_iterations):
    w = 0.5
    c1 = 0.8
    c2 = 0.9
    dimensions = X.shape[1]

    # Initialize particle positions and velocities
    particle_positions = np.random.randint(2, size=(n_particles, dimensions))
    particle_velocities = np.random.rand(n_particles, dimensions)
    personal_best_positions = np.copy(particle_positions)
    personal_best_scores = np.array([evaluate(ind)[0] for ind in particle_positions])

    global_best_position = personal_best_positions[np.argmax(personal_best_scores)]
    global_best_score = np.max(personal_best_scores)

    for _ in range(n_iterations):
        for i in range(n_particles):
            r1, r2 = np.random.rand(dimensions), np.random.rand(dimensions)
            particle_velocities[i] = (w * particle_velocities[i] +
                                      c1 * r1 * (personal_best_positions[i] - particle_positions[i]) +
                                      c2 * r2 * (global_best_position - particle_positions[i]))
            particle_positions[i] = np.where(particle_positions[i] + particle_velocities[i] > 0.5, 1, 0)

            current_score = evaluate(particle_positions[i])[0]
            if current_score > personal_best_scores[i]:
                personal_best_scores[i] = current_score
                personal_best_positions[i] = particle_positions[i]

            if current_score > global_best_score:
                global_best_score = current_score
                global_best_position = particle_positions[i]

    return global_best_position

# Run PSO
best_individual_pso = pso(n_particles=30, n_iterations=50)
print("Best individual from PSO:", best_individual_pso)


Best individual from PSO: [0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 0 0 0 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 0 0 1 1]


In [10]:
#Hybrid GA-PSO Framework

# Hybrid GA-PSO Function
def hybrid_ga_pso():
    # Run GA to get initial population
    best_individual_ga = run_ga()

    # Use the best individual from GA as the initial swarm for PSO
    initial_swarm = np.tile(best_individual_ga, (30, 1))

    # Run PSO
    best_individual_pso = pso(n_particles=30, n_iterations=50)

    return best_individual_pso

# Run Hybrid GA-PSO
best_individual_hybrid = hybrid_ga_pso()
print("Best individual from Hybrid GA-PSO:", best_individual_hybrid)


Best individual from Hybrid GA-PSO: [1 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1
 1 1 0 1 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1]


In [11]:
#Evaluation

def evaluate_final(individual):
    selected_features = [i for i, bit in enumerate(individual) if bit == 1]
    if len(selected_features) == 0:
        return 0

    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    clf = RandomForestClassifier(n_estimators=50, random_state=42)
    clf.fit(X_train_selected, y_train)
    predictions = clf.predict(X_test_selected)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy

final_accuracy = evaluate_final(best_individual_hybrid)
print("Final accuracy with Hybrid GA-PSO selected features:", final_accuracy)


Final accuracy with Hybrid GA-PSO selected features: 0.9851851851851852
