In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import json

In [2]:
with open(r"D:\dtc-dr\models\feature-selection\mlp\cv_scores_mlp.json") as f:
    cv_scores = json.load(f)

target_values = [
    "Stage1.Output.Measurement1.U.Actual",
    "Stage1.Output.Measurement7.U.Actual",
    "Stage1.Output.Measurement11.U.Actual",
    "FirstStage.CombinerOperation.Temperature1.U.Actual",
    "FirstStage.CombinerOperation.Temperature2.U.Actual",
]

In [3]:
param_grid = {
    # 'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100),],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
}

In [4]:
df = pd.read_csv(
    r"D:\dtc-dr\data-analyse\continuous_factory_process.csv", delimiter=","
)

prefixes_to_match = ["Machine1", "Machine2", "Machine3"]

filtered_columns = [
    col
    for col in df.columns
    if any(col.startswith(prefix) for prefix in prefixes_to_match)
]
def calculate_best_regression_model():
    results_dict = {}

    for key, value in cv_scores.items():
        if key in target_values:
            indices = value["indices"]
            X = []
            for index, value in enumerate(indices):
                X.append(filtered_columns[value])
            X = df[X]   
            y = df[key]
            X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
    )
            mlp = MLPRegressor()
            grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

            # Fit the model to the training data
            grid_search.fit(X_train, y_train)

            # Make predictions on both the training and test sets
            y_train_pred = grid_search.predict(X_train)
            y_test_pred = grid_search.predict(X_test)

            # Calculate R2 score for training and test sets
            r2_train = r2_score(y_train, y_train_pred)
            r2_test = r2_score(y_test, y_test_pred)

            # Calculate RMSE for training and test sets
            rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
            rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

            # Calculate MSE for training and test sets
            mse_train = mean_squared_error(y_train, y_train_pred)
            mse_test = mean_squared_error(y_test, y_test_pred)

            results_dict[key] = {
                "r2_train": r2_train,
                "r2_test": r2_test,
                "rmse_train": rmse_train,
                "rmse_test": rmse_test,
                "mse_train": mse_train,
                "mse_test": mse_test,
            }
    return results_dict
     

results = calculate_best_regression_model()
# Save the results to a JSON file
with open('MLP_hyperparametertuning_results.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)
