In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, scale
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

data_loc = "Data Set/UNSW_NB15.csv"
df = pd.read_csv(data_loc, nrows=300) # lees eerste 1000 rijen van dataset
#df = pd.get_dummies(df,columns=["proto","state","service"]) # maak dummies (nummers) van categorische variabelen

le = LabelEncoder()
df['proto'] = le.fit_transform(df['proto'])
df['state'] = le.fit_transform(df['state'])
df['service'] = le.fit_transform(df['service'])
df['attack_cat'] = le.fit_transform(df['attack_cat'])


x = (df - df.mean())/df.std() # standaardiseer dataset (mean 0, variance 1)
x.drop(columns=['attack_cat', 'label'], inplace=True) # drop kolommen die niet gebruikt worden
#x = scale(df)
y = df['attack_cat']


# split de dataset in train/test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.3)
print(X_train)



# Zonder optimalisatie van hyperparameters is de standaard accuracy 0.66 aka 2/3

In [None]:
clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
clf.score(X_test, y_test)


In [None]:
class DNA:
    def __init__(self, value: list) -> None:
        self.value = value

class GeneticAlgo:
    activation = ['identity', 'logistic', 'tanh', 'relu']
    solver = ['lbfgs', 'sgd', 'adam']
    learning_rate = ['constant', 'invscaling', 'adaptive']
    boolean = [True, False]
    hidden_layer_sizes_range = [1, 3000]
    alpha_range = [0.00001, 0.9]
    batch_size_range = [5, 500]
    learning_rate_init_range = [0.0001, 0.1]
    max_iter_range = [100, 8000]
    random_state_range = [1, 100]
    tol_range = [0.00001, 0.1]
    power_t_range = [0.1, 0.9]
    momentum_range = [0.1, 0.9]
    epsilon_range = [0.0000001, 0.001]
    n_iter_no_change_range = [1, 100]
    max_fun_range = [1000, 20000]

    def __init__(self, n_pop: int, max_len: int) -> None:
        self.n_pop = n_pop
        self.max_len = max_len
        self.agents = []
    
    def create_population(self):
        self.agents = [DNA([self.geneCreateRandom(i) for i in range(self.max_len)]) for _ in range(self.n_pop)]

    def geneCreateRandom(self, i):
        match i:
            case 0:
                return np.random.randint(self.hidden_layer_sizes_range[0], self.hidden_layer_sizes_range[1])
            case 1:
                return self.activation[np.random.randint(0, 4)]
            case 2:
                return self.solver[np.random.randint(0, 3)]
            case 3:
                return np.random.uniform(self.alpha_range[0], self.alpha_range[1])
            case 4:
                return np.random.randint(self.batch_size_range[0], self.batch_size_range[1])
            case 5:
                return self.learning_rate[np.random.randint(0, 3)]
            case 6:
                return np.random.uniform(self.learning_rate_init_range[0], self.learning_rate_init_range[1])
            case 7:
                return np.random.uniform(self.power_t_range[0], self.power_t_range[1])
            case 8:
                return np.random.randint(self.max_iter_range[0], self.max_iter_range[1])
            case 9:
                return self.boolean[np.random.randint(0, 2)]
            case 10:
                return np.random.randint(self.random_state_range[0], self.random_state_range[1])
            case 11:
                return np.random.uniform(self.tol_range[0], self.tol_range[1])
            case 12:
                return np.random.uniform(self.momentum_range[0], self.momentum_range[1])
            case 13:
                return self.boolean[np.random.randint(0, 2)]
            case 14:
                return np.random.uniform(self.epsilon_range[0], self.epsilon_range[1])
            case 15:
                return np.random.randint(self.n_iter_no_change_range[0], self.n_iter_no_change_range[1])
            case 16:
                return np.random.randint(self.max_fun_range[0], self.max_fun_range[1])
            
    def evaluate(self):
        def compute_fitness(agent):
            clf = MLPClassifier(hidden_layer_sizes=agent[0], activation=agent[1], solver=agent[2], alpha=agent[3], batch_size=agent[4], learning_rate=agent[5], learning_rate_init=agent[6], power_t=agent[7], max_iter=agent[8], shuffle=agent[9], random_state=agent[10], tol=agent[11], momentum=agent[12], nesterovs_momentum=agent[13], epsilon=agent[14], n_iter_no_change=agent[15], max_fun=agent[16]).fit(X_train, y_train)
            return clf.score(X_test, y_test)
        
        fitness_scores = [compute_fitness(a.value) for a in self.agents]
        # sorteer agents op fitness score
        sorted_agents = sorted(zip(fitness_scores, self.agents), reverse=True, key=lambda x: x[0])
        return [a for _, a in sorted_agents], sorted_agents[0]
    
    def dna_crossover(self, dna, mutation_rate):
        new_generation = GeneticAlgo(self.n_pop, self.max_len)
        #elitism (beste 2 agents gaan door)
        new_generation.agents.append(dna[0]) 
        new_generation.agents.append(dna[1])

        for i in range(0, self.n_pop-2, 2): # 2 af door elitism
            crossover_point = np.random.randint(0, self.max_len)
            new_dna1 = DNA(dna[i].value[:crossover_point] + dna[i+1].value[crossover_point:])
            new_dna2 = DNA(dna[i+1].value[:crossover_point] + dna[i].value[crossover_point:])

            # kleine kan op mutatie
            if np.random.random() < mutation_rate:
                mutation_point = np.random.randint(0, self.max_len)
                new_dna1.value[mutation_point] = self.geneCreateRandom(mutation_point)
            if np.random.random() < mutation_rate:
                mutation_point = np.random.randint(0, self.max_len)
                new_dna2.value[mutation_point] = self.geneCreateRandom(mutation_point)
            new_generation.agents.append(new_dna1)
            new_generation.agents.append(new_dna2)
        
        return new_generation



In [12]:
mutation_rate = 0.1
num_parents = 2
algo = GeneticAlgo(10, 17)
algo.create_population()
target = 0.8

for i in range(1000):
    sorted_dna, best_fitness = algo.evaluate()
    print(f'iter: {i}, best fitness: {best_fitness[0]}, value: "{sorted_dna[0].value}"')
    if best_fitness == target: break

    new_algo = algo.dna_crossover(sorted_dna, mutation_rate)
    algo = new_algo

    