In [15]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import json

In [16]:
with open(r"D:\dtc-dr\models\feature-selection\mlp\cv_scores_mlp.json") as f:
    cv_scores = json.load(f)

target_values = [
    "Stage1.Output.Measurement1.U.Actual",
    "Stage1.Output.Measurement7.U.Actual",
    "Stage1.Output.Measurement11.U.Actual",
    "FirstStage.CombinerOperation.Temperature1.U.Actual",
    "FirstStage.CombinerOperation.Temperature2.U.Actual",
]

In [17]:
df = pd.read_csv(
    r"D:\dtc-dr\data-analyse\continuous_factory_process.csv", delimiter=","
)

prefixes_to_match = ["Machine1", "Machine2", "Machine3"]

filtered_columns = [
    col
    for col in df.columns
    if any(col.startswith(prefix) for prefix in prefixes_to_match)
]

In [18]:
import random


# Binair genetisch algoritme (Binary Genetic Algorithm) uniforme crossover
def uniform_crossover(parent1, parent2):
    child = []
    for gene1, gene2 in zip(parent1, parent2):
        if random.random() < 0.5:
            child.append(gene1)
        else:
            child.append(gene2)
    return child


# Probleem met permutaties (Permutation Problem) crossover (Werkt niet)
def order_crossover(parent1, parent2):
    n = len(parent1)
    start, end = sorted(random.sample(range(n), 2))
    child = [-1] * n

    # Copy the segment from parent1 to the child
    child[start : end + 1] = parent1[start : end + 1]

    # Create a mapping of genes in the segment from parent2
    mapping = {gene: None for gene in parent2[start : end + 1]}

    # Initialize pointers for the remaining part of the child
    j = end + 1
    i = end + 1

    # Loop through the remaining part of parent2, preserving order
    while None in mapping.values():
        gene = parent2[i % n]
        if gene not in mapping.values():
            child[j % n] = gene
            j += 1
        i += 1
    return child


# one point crossover
def reproduceer(ouder1, ouder2):
    n = len(ouder1) - 1
    c = random.randint(1, n)
    ouder1 = ouder1[:c]
    ouder2 = ouder2[c : n + 1]
    resultaat = []
    for i in ouder1:
        resultaat.append(i)
    for j in ouder2:
        resultaat.append(j)
    return resultaat


# # Split the training set further into training and validation sets
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# np.random.seed(42)
# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

from sklearn.model_selection import KFold
# Define the fitness function for regression
def evaluate_regression(individual, X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    hidden_layer_sizes = tuple(individual[0:3])
    learning_rate_init = individual[3]
    activation = individual[4]
    solver = individual[5]
    alpha = individual[6]
    momentum = individual[7]
    max_iter = individual[8]

    # Ensure that the number of input neurons matches the number of features
    input_neurons = X_train.shape[1]  # Number of features in the processed dataset

    # Create and train the MLPRegressor model
    model = MLPRegressor(
        hidden_layer_sizes=(input_neurons,)
        + hidden_layer_sizes,  # Include input layer neurons
        learning_rate_init=learning_rate_init,
        activation=activation,
        solver=solver,
        random_state=42,
        alpha=alpha, # Initial
        momentum=momentum,
        max_iter=max_iter
    )

    model.fit(X_train, y_train)

    # Print results from the training set
    train_r2_score = model.score(X_train, y_train)
    train_mse = mean_squared_error(y_train, model.predict(X_train))
    train_rmse = np.sqrt(train_mse)

    # Make predictions on the test set
    y_test_pred = model.predict(X_test)

    # Evaluate the model's performance on the test set
    test_r2_score = model.score(X_test, y_test)
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_rmse = np.sqrt(test_mse)
    abs_diff = abs(train_r2_score - test_r2_score)
    combined_fitness = test_r2_score - abs_diff
    # # # Combine validation and test mean squared errors into a single fitness value
    # combined_fitness = (train_r2_score + test_r2_score) / 2.0
    return [combined_fitness, train_r2_score, test_r2_score]
    

    # # Perform 5-fold cross-validation
    # kf = KFold(n_splits=5, shuffle=True, random_state=42)
    # train_r2_scores = []
    # test_r2_scores = []

    # for train_index, test_index in kf.split(X):
    #     X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    #     y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    #     model.fit(X_train, y_train)

    #     # Training set evaluation
    #     train_r2_scores.append(r2_score(y_train, model.predict(X_train)))

    #     # Test set evaluation
    #     test_r2_scores.append(r2_score(y_test, model.predict(X_test)))

    # # Calculate average scores across folds
    # avg_train_r2 = np.mean(train_r2_scores)
    # avg_test_r2 = np.mean(test_r2_scores)

    # abs_diff = abs(avg_train_r2 - avg_test_r2)
    # combined_fitness = avg_test_r2 - abs_diff

    # combined_fitness = avg_test_r2 - abs_diff
    # return [combined_fitness, avg_train_r2, avg_test_r2]

    


# Definieer de parameters van het genetisch algoritme
population_size = 15
aantal_generaties = 3
mutatie_kans = 0.3
mutatie_sigma = 0.01


# Creëer de initiële populatie met willekeurige individuen
def creëer_individu():
    return [
        random.randint(1, 200),  # Aantal neuronen in verborgen laag 1
        random.randint(1, 200),  # Aantal neuronen in verborgen laag 2
        random.randint(1, 200),  # Aantal neuronen in verborgen laag 3
        random.uniform(0.0001, 0.1),  # Leersnelheid
        random.choice(["identity", "logistic", "tanh", "relu"]),  # Activatiefunctie
        random.choice(["lbfgs", "adam"]),  # Solver
        random.uniform(0.0001, 0.0005),  # Alpha (regularisatieparameter)
        random.uniform(0.8, 0.99),  # Lereningsmomentum
        random.randint(400, 1000),  # max_iter
    ]


with open(r"D:\dtc-dr\models\feature-selection\mlp\cv_scores_mlp.json") as f:
    cv_scores = json.load(f)

target_values = [
    "Stage1.Output.Measurement1.U.Actual",
    "Stage1.Output.Measurement7.U.Actual",
    "Stage1.Output.Measurement11.U.Actual",
    "FirstStage.CombinerOperation.Temperature1.U.Actual",
    "FirstStage.CombinerOperation.Temperature2.U.Actual",
]

result = {}

for key, value in cv_scores.items():
    if key in target_values:
        print(key)
        indices = value["indices"]
        print(indices)
        X = []
        for index, value in enumerate(indices):
            X.append(filtered_columns[value])
        X = df[X]
        y = df[key]
        populatie = [creëer_individu() for _ in range(population_size)]

        # Main loop for the genetic algorithm
        for generation in range(aantal_generaties):
            print(generation)
            # Evaluate the fitness of each individual in the population
            fitness_scores = [
                evaluate_regression(individual, X, y)[0] for individual in populatie
            ]
            print(fitness_scores)

            # Select the best-performing individuals for reproduction
            number_of_parents = int(
                population_size * 0.3
            )  # You can adjust the selection ratio
            parents = [
                populatie[i]
                for i in sorted(range(population_size), key=lambda i: -fitness_scores[i])[
                    :number_of_parents
                ]
            ]

            # Create the next generation using crossover and mutation
            new_population = []

            while len(new_population) < population_size:
                parent1, parent2 = random.choice(parents), random.choice(parents)
                child = uniform_crossover(parent1, parent2)

                # Apply mutation to the child with a certain probability (Gaussian Mutation)
                for i in range(4, len(child) - 4):
                    if random.random() < mutatie_kans:
                        if type(child[i]) == int or type(child[i]) == float:
                            child[i] += random.gauss(0, mutatie_sigma)
                            child[i] = max(0.0001, min(0.1, child[i]))

                new_population.append(child)

            # Replace the old population with the new generation
            populatie = new_population

        # Find the best individual in the final population
        best_individual = max(populatie, key=lambda ind: evaluate_regression(ind, X, y)[0])
        evaluate = evaluate_regression(best_individual, X, y)
        print(evaluate)
        print("Best individual:", best_individual)
        print("Best fitness:", evaluate[0])
        print("Best train_r2_score error:", evaluate[1])
        print("Best test_r2_score error:", evaluate[2])

        result[key] = {
            "best_individual": best_individual,
            "best_fitness": evaluate[0],
            "best_train_r2_score": evaluate[1],
            "best_test_r2_score": evaluate[2],
            "indices": indices,
        }

# Save the results to a JSON file
with open('MLP_ga.json', 'w') as json_file:
    json.dump(result, json_file, indent=4)

Stage1.Output.Measurement1.U.Actual
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


KeyboardInterrupt: 

In [20]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neural_network import MLPRegressor
import numpy as np
import json


with open(r'D:\dtc-dr\models\feature-selection\mlp\hypertuning\MLP_ga.json') as json_file:
    result = json.load(json_file)
    
endresult = {}

for key, value in result.items():
    selected_columns = [filtered_columns[i] for i in value["indices"]]

    X = df[selected_columns]
    y = df[key]

    individual = value["best_individual"]
    
    hidden_layer_sizes = tuple(individual[0:3])
    learning_rate_init = individual[3]
    activation = individual[4]
    solver = individual[5]
    alpha = individual[6]
    momentum = individual[7]
    max_iter = individual[8]

    # Ensure that the number of input neurons matches the number of features
    input_neurons = X.shape[1]  # Number of features in the processed dataset

    # Create and train the MLPRegressor model
    model = MLPRegressor(
        hidden_layer_sizes=(input_neurons,)
        + hidden_layer_sizes,  # Include input layer neurons
        learning_rate_init=learning_rate_init,
        activation=activation,
        solver=solver,
        random_state=42,
        alpha=alpha,
        momentum=momentum,
        max_iter=max_iter
    )

    # Define the cross-validation strategy (5-fold cross-validation) on the training set
    cv_train = KFold(n_splits=5, shuffle=True, random_state=42)

    # Perform cross-validation on the training set
    r2_scores_train = cross_val_score(model, X, y, cv=cv_train, scoring='r2')
    mse_scores_train = -cross_val_score(model, X, y, cv=cv_train, scoring='neg_mean_squared_error')

    # Print the cross-validation results on the training set
    print(f'Training set - R-squared scores: {r2_scores_train}')
    print(f'Training set - Mean R-squared: {np.mean(r2_scores_train)}')
    print(f'Training set - Mean Squared Error scores: {mse_scores_train}')
    print(f'Training set - Mean MSE: {np.mean(mse_scores_train)}')

    # Define the cross-validation strategy (5-fold cross-validation) on the test set
    cv_test = KFold(n_splits=5, shuffle=True, random_state=42)

    # Perform cross-validation on the test set
    r2_scores_test = cross_val_score(model, X, y, cv=cv_test, scoring='r2')
    mse_scores_test = -cross_val_score(model, X, y, cv=cv_test, scoring='neg_mean_squared_error')

    # Print the cross-validation results on the test set
    print(f'Test set - R-squared scores: {r2_scores_test}')
    print(f'Test set - Mean R-squared: {np.mean(r2_scores_test)}')
    print(f'Test set - Mean Squared Error scores: {mse_scores_test}')
    print(f'Test set - Mean MSE: {np.mean(mse_scores_test)}')

    endresult[key] = {
        "mean_r2_scores_train": np.mean(r2_scores_train),
        "mean_mse_scores_train": np.mean(mse_scores_train),
        "mean_r2_scores_test": np.mean(r2_scores_test),
        "mean_mse_scores_test": np.mean(mse_scores_test),
    }

endresult



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Training set - R-squared scores: [0.75617    0.53387899 0.51266668 0.15997616 0.67889388]
Training set - Mean R-squared: 0.5283171415289974
Training set - Mean Squared Error scores: [11.68809669 21.97958064 23.31687451 40.17794765 15.26630236]
Training set - Mean MSE: 22.485760369253985


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Test set - R-squared scores: [0.75617    0.53387899 0.51266668 0.15997616 0.67889388]
Test set - Mean R-squared: 0.5283171415289974
Test set - Mean Squared Error scores: [11.68809669 21.97958064 23.31687451 40.17794765 15.26630236]
Test set - Mean MSE: 22.485760369253985


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Training set - R-squared scores: [0.82246303 0.79942923 0.63817328 0.7819586  0.79487941]
Training set - Mean R-squared: 0.7673807083411944
Training set - Mean Squared Error scores: [0.35508623 0.40395414 0.7296836  0.43281795 0.40564434]
Training set - Mean MSE: 0.465437250907185


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Test set - R-squared scores: [0.82246303 0.79942923 0.63817328 0.7819586  0.79487941]
Test set - Mean R-squared: 0.7673807083411944
Test set - Mean Squared Error scores: [0.35508623 0.40395414 0.7296836  0.43281795 0.40564434]
Test set - Mean MSE: 0.465437250907185


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Training set - R-squared scores: [0.91907325 0.8899391  0.87020865 0.88698524 0.91699119]
Training set - Mean R-squared: 0.8966394880299025
Training set - Mean Squared Error scores: [0.52558067 0.71939534 0.82958111 0.73451495 0.53138442]
Training set - Mean MSE: 0.6680912984137387


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Test set - R-squared scores: [0.91907325 0.8899391  0.87020865 0.88698524 0.91699119]
Test set - Mean R-squared: 0.8966394880299025
Test set - Mean Squared Error scores: [0.52558067 0.71939534 0.82958111 0.73451495 0.53138442]
Test set - Mean MSE: 0.6680912984137387


KeyboardInterrupt: 