### Resultaten zonder optimalisatie

In [32]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import json

with open(r"D:\dtc-dr\combined_results_23012024.json", "r") as json_file:
    combined_results = json.load(json_file)

# Dit zijn de geselecteerde kolommen waarbij modellen met redelijke accuraraatheid geproduceerd kunnen worden
selected_y = [
    "Stage1.Output.Measurement1.U.Actual",
    "Stage1.Output.Measurement7.U.Actual",
    "Stage1.Output.Measurement11.U.Actual",
    "FirstStage.CombinerOperation.Temperature1.U.Actual",
    "FirstStage.CombinerOperation.Temperature2.U.Actual",
]

selected_y_data = {}

for key, value in combined_results.items():
    if key in selected_y: 
        selected_y_data[key] = value
        
selected_y_data


{'Stage1.Output.Measurement1.U.Actual': {'regression': [0.7842584834160684,
   0.7513302764551197,
   [0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    35]],
  'decision_tree': [1.0,
   0.8088659050418163,
   [0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 31, 32]],
  'mlp': [0.7283264361961774,
   0.7045586513596629,
   [0,
    1,
    2,
    3,
    5,
    7,
    8,
    9,
    10,
    11,
    13,
    14,
    15,
    24,
    25,
    26,
    27,
    31,
    32,
    33,
    34]]},
 'Stage1.Output.Measurement7.U.Actual': {'regression': [0.792018941981842,
   0.7912272589184344,
   [0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    26,
 

In [33]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LinearRegression
import numpy as np

df = pd.read_csv(r"D:\dtc-dr\data-analyse\continuous_factory_process.csv", delimiter=",")
prefixes_to_match = ["Machine1", "Machine2", "Machine3"]

filtered_columns = [
    col
    for col in df.columns
    if any(col.startswith(prefix) for prefix in prefixes_to_match)
]


X = df[filtered_columns]

# X = df[['room_num', 'teachers', 'poor_prop']]
Y = df['Stage1.Output.Measurement1.U.Actual']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [34]:
# Dictionary to store results
results_dict = {}


for key, value in selected_y_data.items():
    features_regression_x = [X.columns.tolist()[index] for index in value["regression"][2]]
    features_decisiontree_x = [X.columns.tolist()[index] for index in value["decision_tree"][2]]
    features_mlp_x = [X.columns.tolist()[index] for index in value["mlp"][2]]

    # Linear Regression
    X_regression = X[features_regression_x]
    X_train, X_test, y_train, y_test = train_test_split(X_regression, Y, test_size=0.2, random_state=42)

    regr = LinearRegression()

    cv_train = KFold(n_splits=5, shuffle=True, random_state=42)
    r2_scores_train = cross_val_score(regr, X_train, y_train, cv=cv_train, scoring='r2')

    cv_test = KFold(n_splits=5, shuffle=True, random_state=42)
    r2_scores_test = cross_val_score(regr, X_test, y_test, cv=cv_test, scoring='r2')

    results_dict[f"{key}_linear_regression"] = {
        "r2_train": np.mean(r2_scores_train),
        "r2_test": np.mean(r2_scores_test)
    }

    # Decision Tree Regression
    X_decisiontree = X[features_decisiontree_x]
    X_train, X_test, y_train, y_test = train_test_split(X_decisiontree, Y, test_size=0.2, random_state=42)

    dt_regr = DecisionTreeRegressor()

    r2_scores_train_dt = cross_val_score(dt_regr, X_train, y_train, cv=cv_train, scoring='r2')

    r2_scores_test_dt = cross_val_score(dt_regr, X_test, y_test, cv=cv_test, scoring='r2')

    results_dict[f"{key}_decision_tree"] = {
        "r2_train": np.mean(r2_scores_train_dt),
        "r2_test": np.mean(r2_scores_test_dt)
    }

    # MLP Regression
    X_mlp = X[features_mlp_x]
    X_train, X_test, y_train, y_test = train_test_split(X_mlp, Y, test_size=0.2, random_state=42)

    mlp_regr = MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)

    r2_scores_train_mlp = cross_val_score(mlp_regr, X_train, y_train, cv=cv_train, scoring='r2')

    r2_scores_test_mlp = cross_val_score(mlp_regr, X_test, y_test, cv=cv_test, scoring='r2')

    results_dict[f"{key}_mlp"] = {
        "r2_train": np.mean(r2_scores_train_mlp),
        "r2_test": np.mean(r2_scores_test_mlp)
    }

print(results_dict)

{'Stage1.Output.Measurement1.U.Actual_linear_regression': {'r2_train': 0.7826197037111307, 'r2_test': 0.7502482064955783}, 'Stage1.Output.Measurement1.U.Actual_decision_tree': {'r2_train': 0.6455326077414651, 'r2_test': 0.5798171412045889}, 'Stage1.Output.Measurement1.U.Actual_mlp': {'r2_train': 0.5033301489032848, 'r2_test': 0.4608964918657302}, 'Stage1.Output.Measurement7.U.Actual_linear_regression': {'r2_train': 0.7826197037111307, 'r2_test': 0.7502482064955783}, 'Stage1.Output.Measurement7.U.Actual_decision_tree': {'r2_train': 0.7960169246467175, 'r2_test': 0.703220929285005}, 'Stage1.Output.Measurement7.U.Actual_mlp': {'r2_train': 0.3701890588190064, 'r2_test': 0.4093979251530772}, 'Stage1.Output.Measurement11.U.Actual_linear_regression': {'r2_train': 0.7826197037111307, 'r2_test': 0.7502482064955783}, 'Stage1.Output.Measurement11.U.Actual_decision_tree': {'r2_train': 0.8173208349594594, 'r2_test': 0.7213319458687956}, 'Stage1.Output.Measurement11.U.Actual_mlp': {'r2_train': 0.474

In [35]:
import matplotlib.pyplot as plt

results = {
    "Stage1.Output.Measurement1.U.Actual_linear_regression": {
        "r2_train": 0.7826197037111307,
        "r2_test": 0.7502482064955783,
    },
    "Stage1.Output.Measurement1.U.Actual_decision_tree": {
        "r2_train": 0.6405549731889563,
        "r2_test": 0.5993197587211844,
    },
    "Stage1.Output.Measurement1.U.Actual_mlp": {
        "r2_train": 0.45124787344032535,
        "r2_test": 0.41114367501636,
    },
    "Stage1.Output.Measurement7.U.Actual_linear_regression": {
        "r2_train": 0.7826197037111307,
        "r2_test": 0.7502482064955783,
    },
    "Stage1.Output.Measurement7.U.Actual_decision_tree": {
        "r2_train": 0.7967275365963725,
        "r2_test": 0.7077919233640326,
    },
    "Stage1.Output.Measurement7.U.Actual_mlp": {
        "r2_train": 0.5182349247008973,
        "r2_test": 0.33199961890606106,
    },
    "Stage1.Output.Measurement11.U.Actual_linear_regression": {
        "r2_train": 0.7826197037111307,
        "r2_test": 0.7502482064955783,
    },
    "Stage1.Output.Measurement11.U.Actual_decision_tree": {
        "r2_train": 0.8146529537662728,
        "r2_test": 0.7236602744532182,
    },
    "Stage1.Output.Measurement11.U.Actual_mlp": {
        "r2_train": 0.6260072036241983,
        "r2_test": 0.3327121201849311,
    },
    "FirstStage.CombinerOperation.Temperature1.U.Actual_linear_regression": {
        "r2_train": 0.7826197037111307,
        "r2_test": 0.7502482064955783,
    },
    "FirstStage.CombinerOperation.Temperature1.U.Actual_decision_tree": {
        "r2_train": 0.6467065045158072,
        "r2_test": 0.5989259472680658,
    },
    "FirstStage.CombinerOperation.Temperature1.U.Actual_mlp": {
        "r2_train": 0.601277434622552,
        "r2_test": 0.23860878446912967,
    },
    "FirstStage.CombinerOperation.Temperature2.U.Actual_linear_regression": {
        "r2_train": 0.7826197037111307,
        "r2_test": 0.7502482064955783,
    },
    "FirstStage.CombinerOperation.Temperature2.U.Actual_decision_tree": {
        "r2_train": 0.7352150492483074,
        "r2_test": 0.6597280723870119,
    },
    "FirstStage.CombinerOperation.Temperature2.U.Actual_mlp": {
        "r2_train": 0.5768264342809564,
        "r2_test": 0.2837101300244965,
    },
}

In [None]:
# 