In [None]:
# Import several libraries that we will use below
import numpy as np
import pandas as pd
import sklearn
import keras

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression , LogisticRegression , Lasso
from sklearn.metrics import mean_absolute_error, r2_score
from keras import layers, Sequential
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.layers import Dense

In [None]:
import pandas as pd

# Load the dataset from the Excel file
# file_path = r"C:\Users\frm19\OneDrive - Wageningen University & Research\2. Thesis - Information Technology\7. Datasets\drl-minigreenhouse-datasets\compiled-real-and-measurements-datasets-cleaned.xlsx"
file_path = r"training.xlsx"
mgh_data = pd.read_excel(file_path)

# Display the first few rows of the dataframe
mgh_data.head()

# Drop rows with missing values
# mgh_data = mgh_data.dropna()

# Length of dataset
num_rows = len(mgh_data)
print(f'Number of rows: {num_rows}')



In [None]:
# Inspect the dataset
mgh_data.head(5)

In [None]:
# Select features and target variable
X = mgh_data[['time', 'co2 in - sim', 'rh in - sim', 'temperature in - sim', 'par in - sim']]
y = mgh_data['rh in - real']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.0, random_state=42)


In [None]:
# Scale the dataset 
# Normalize the features
scaler = StandardScaler().fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Build the ANN model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Output layer for regression

In [None]:
# Check the structure
model.summary()

In [None]:
# Compile the model
# Set things up
# set things up
model.compile(loss='mse', metrics=['mae', 'R2Score'], optimizer='rmsprop')

In [None]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=200, batch_size=10, validation_split=0.2, verbose=1)


#### Monitoring the training process

We will visualize the model training process, and see how these metrics evolve during

In [None]:
# Plot loss (MSE) for both training and validation split

sns.lineplot(x=history.epoch, y=history.history['loss'], color='g', label='training')
sns.lineplot(x=history.epoch, y=history.history['val_loss'], color='b', label='validation')
plt.title('Model loss during training')
plt.ylabel('loss')
plt.xlabel('training epoch')
plt.legend(loc='upper right')

In [None]:
# Plot R2 for both training and validation split

sns.lineplot(x=history.epoch, y=history.history['R2Score'], color='g', label='training')
sns.lineplot(x=history.epoch, y=history.history['val_R2Score'], color='b', label='validation')

plt.title('R2 during training')
plt.ylabel('R2')
plt.xlabel('epoch')
plt.legend(loc='lower right')

#### Evaluate with the independent test set

In [None]:
# Make predictions on the test set
y_hat_test = model.predict(X_test_scaled)

# Calculate MAE and R² score
mae = mean_absolute_error(y_test, y_hat_test)
r2 = r2_score(y_test, y_hat_test)

print(f"MAE: {mae:.2f}")
print(f"R²: {r2:.2f}")

# Plotting the test vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_hat_test, label='Predicted vs Actual')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.legend()
plt.show()


# COMPLETED AUTOMATE THE PROCESS 

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

import seaborn as sns

In [26]:
import pandas as pd

# Load the datasets from separate files
file_path = r"completed-datasets.xlsx"

# Load and clean the training dataset
mgh_data = pd.read_excel(file_path)

# Display the first few rows of the dataframe
mgh_data.head()



Unnamed: 0,time,global out,global in,temp in,temp out,rh in,rh out,co2 in,co2 out,toplights,ventilation,heater,Unnamed: 12,Unnamed: 13
0,0,0.032943,0.0,19.806459,22.685,62.243682,52.775,409.0,1530.5,1,0.0,0,dataset3,May
1,300,0.032943,0.0,19.806459,22.685,62.243682,52.775,409.0,1530.5,1,0.0,0,,
2,600,0.032943,0.0,19.889053,22.745,62.040377,52.51,409.0,1506.5,1,0.0,0,,
3,900,0.032943,0.0,20.017409,22.823333,61.694059,52.213333,408.333333,1455.0,1,0.0,0,,
4,1200,0.032943,0.0,20.145762,22.865,61.322003,52.015,408.0,1413.0,1,0.0,0,,


In [27]:
def r2_score_metric(y_true, y_pred):
    """Custom R2 score metric"""
    SS_res =  tf.reduce_sum(tf.square(y_true - y_pred)) 
    SS_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true))) 
    return (1 - SS_res/(SS_tot + tf.keras.backend.epsilon()))

def train_and_evaluate_model(target_variable, flag_train, _epochs=100, _batch_size=10):
    features = ['time', 'global out', 'temp out', 'temp out', 'rh out', 'co2 out', 'ventilation', 'toplights', 'heater']
    
    X = mgh_data[features]
    y = mgh_data[target_variable]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.0001, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    if flag_train:
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1))

        model.compile(optimizer='rmsprop', 
                      loss='mean_squared_error', 
                      metrics=['mae', r2_score_metric])

        history = model.fit(X_train_scaled, y_train, epochs=_epochs, batch_size=_batch_size, validation_split=0.2, verbose=1)
        
        # Save the model in the recommended Keras format
        model.save(f'model/{target_variable}_model.keras')
        
        y_hat_test = model.predict(X_test_scaled)
    else:
        # Load the model using the native Keras format
        loaded_model = load_model(f'model/{target_variable}_model.keras', custom_objects={'r2_score_metric': r2_score_metric})
        
        y_hat_test = loaded_model.predict(X_test_scaled)

    mae = mean_absolute_error(y_test, y_hat_test)
    r2 = r2_score(y_test, y_hat_test)
    
    if flag_train:
        plt.figure(figsize=(6, 4))
        sns.lineplot(x=history.epoch, y=history.history['loss'], color='g', label='training')
        sns.lineplot(x=history.epoch, y=history.history['val_loss'], color='b', label='validation')
        plt.title('Model loss during training')
        plt.ylabel('loss')
        plt.xlabel('training epoch')
        plt.legend(loc='upper right')

        plt.figure(figsize=(6, 4))
        sns.lineplot(x=history.epoch, y=history.history['r2_score_metric'], color='g', label='training')
        sns.lineplot(x=history.epoch, y=history.history['val_r2_score_metric'], color='b', label='validation')

        plt.title('R2 during training')
        plt.ylabel('R2')
        plt.xlabel('epoch')
        plt.legend(loc='lower right')

    print(f"\nTarget Variable: {target_variable}")
    print(f"MAE: {mae:.2f}")
    print(f"R²: {r2:.2f}")

    plt.figure(figsize=(6, 4))
    plt.scatter(y_test, y_hat_test, label='Predicted vs Actual')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, label='Perfect Prediction')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Actual vs Predicted Values for {target_variable}\nMAE: {mae:.2f}, R²: {r2:.2f}')
    plt.legend()
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(len(y_test)), y_test, marker='o', linestyle='-', color='blue', label='Actual')
    plt.plot(range(len(y_hat_test)), y_hat_test, marker='x', linestyle='--', color='red', label='Predicted')
    plt.title(f'Actual vs Predicted Values for {target_variable}')
    plt.xlabel('Index')
    plt.ylabel(target_variable)
    plt.legend()
    plt.show()

In [28]:
# List of target variables
target_variables = ['global in', 'temp in', 'rh in', 'co2 in']

# Iterate through each target variable and call the function
for target in target_variables:
    train_and_evaluate_model(target, flag_train=True, _epochs = 5, _batch_size= 4)

Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3992/3992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 976us/step - loss: 1.4148 - mae: 0.5106 - r2_score_metric: -1.3393 - val_loss: 0.6364 - val_mae: 0.3221 - val_r2_score_metric: 0.3659
Epoch 2/5
[1m3992/3992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 865us/step - loss: 0.6460 - mae: 0.2870 - r2_score_metric: 0.4814 - val_loss: 0.5891 - val_mae: 0.2743 - val_r2_score_metric: 0.6860
Epoch 3/5
[1m3992/3992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 891us/step - loss: 0.5128 - mae: 0.2653 - r2_score_metric: 0.5671 - val_loss: 0.5509 - val_mae: 0.2821 - val_r2_score_metric: 0.4625
Epoch 4/5
[1m1250/3992[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m2s[0m 768us/step - loss: 0.6130 - mae: 0.2800 - r2_score_metric: 0.5348

KeyboardInterrupt: 

# AUTOMATE THE PROCESS 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler

import seaborn as sns

In [None]:
import pandas as pd

# Load the datasets from separate files
file_path_training = r"train-iot-datasets.xlsx"
file_path_test = r"test-iot-datasets-compiled-2-converted.xlsx"

# Load and clean the training dataset
mgh_data_training = pd.read_excel(file_path_training)
#mgh_data_training = mgh_data_training.dropna()  # Drop rows with missing values

# Load and clean the test dataset
mgh_data_test = pd.read_excel(file_path_test)
#mgh_data_test = mgh_data_test.dropna()  # Drop rows with missing values

# Display the first few rows of the cleaned training and test dataframes
print("Training Data Sample:")
print(mgh_data_training.head())

print("\nTest Data Sample:")
print(mgh_data_test.head())

# Length of dataset
num_rows_training = len(mgh_data_training)
print(f'\nNumber of rows training: {num_rows_training}')

num_rows_test = len(mgh_data_test)
print(f'Number of rows test: {num_rows_test}')

# Show all rows and columns of the test dataframe
# pd.set_option('display.max_rows', None)  # Show all rows
# pd.set_option('display.max_columns', None)  # Show all columns

# print("\nFull Test Data:")
# print(mgh_data_test)

# # Reset options to default
# pd.reset_option('display.max_rows')
# pd.reset_option('display.max_columns')


In [None]:
def train_and_evaluate_model(target_variable, _epochs=100, _batch_size=10):
    # Select features and target variable for training and test data
    features = ['time', 'global out', 'temp out', 'temp out', 'rh out', 'co2 out', 'ventilation', 'toplights', 'heater']
    
    # Training data
    X_train = mgh_data_training[features]
    y_train = mgh_data_training[target_variable]
    
    # Test data
    X_test = mgh_data_test[features]
    y_test = mgh_data_test[target_variable]

    # Normalize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Build the ANN model
    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))  # Output layer for regression

    # Compile the model
    # model.compile(optimizer='adam', loss='mean_squared_error')
    model.compile(loss='mse', metrics=['mae', 'R2Score'], optimizer='rmsprop')

    # Train the model
    history = model.fit(X_train_scaled, y_train, epochs=_epochs, batch_size=_batch_size, validation_split=0.2, verbose=1)

    # Make predictions on the test set
    y_hat_test = model.predict(X_test_scaled)

    # Calculate MAE and R² score
    mae = mean_absolute_error(y_test, y_hat_test)
    r2 = r2_score(y_test, y_hat_test)
    
    # Plot loss (MSE) for both training and validation split
    plt.figure(figsize=(6, 4))
    sns.lineplot(x=history.epoch, y=history.history['loss'], color='g', label='training')
    sns.lineplot(x=history.epoch, y=history.history['val_loss'], color='b', label='validation')
    plt.title('Model loss during training')
    plt.ylabel('loss')
    plt.xlabel('training epoch')
    plt.legend(loc='upper right')
    
    # Plot R2 for both training and validation split
    plt.figure(figsize=(6, 4))
    sns.lineplot(x=history.epoch, y=history.history['R2Score'], color='g', label='training')
    sns.lineplot(x=history.epoch, y=history.history['val_R2Score'], color='b', label='validation')

    plt.title('R2 during training')
    plt.ylabel('R2')
    plt.xlabel('epoch')
    plt.legend(loc='lower right')

    # Print MAE and R²
    print(f"\nTarget Variable: {target_variable}")
    print(f"MAE: {mae:.2f}")
    print(f"R²: {r2:.2f}")

    # Plotting the test vs predicted values
    plt.figure(figsize=(6, 4))
    plt.scatter(y_test, y_hat_test, label='Predicted vs Actual')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, label='Perfect Prediction')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Actual vs Predicted Values for {target_variable}\nMAE: {mae:.2f}, R²: {r2:.2f}')
    plt.legend()
    
    # Plot the test vs predicted values in order
    plt.figure(figsize=(10, 6))
    plt.plot(range(len(y_test)), y_test, marker='o', linestyle='-', color='blue', label='Actual')
    plt.plot(range(len(y_hat_test)), y_hat_test, marker='x', linestyle='--', color='red', label='Predicted')
    plt.title(f'Actual vs Predicted Values for {target_variable}')
    plt.xlabel('Index')
    plt.ylabel(target_variable)
    plt.legend()
    plt.show()

In [None]:
# List of target variables
target_variables = ['global in', 'temp in', 'rh in', 'co2 in']

# Iterate through each target variable and call the function
for target in target_variables:
    train_and_evaluate_model(target, 200, 4)