In [4]:
import pandas as pd
import numpy as np
import autokeras as ak
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [5]:
df = pd.read_csv('TurinAHU.csv')
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

# Extracting day, month, year, and time into separate columns
df["day"] = df["Timestamp"].dt.day
df["month"] = df["Timestamp"].dt.month
df["year"] = df["Timestamp"].dt.year
df["time"] = df["Timestamp"].dt.hour * 3600 + df["Timestamp"].dt.minute * 60 + df["Timestamp"].dt.second

# Dropping the "timestamp" column
df = df.drop("Timestamp", axis=1)

# Reordering the columns
cols = df.columns.tolist()
cols = ["time", "day", "month"] + cols[:-4]
df = df[cols]
df

Unnamed: 0,time,day,month,T_Supply,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power
0,79200,14,10,19.859999,20.469999,18.5,19.020000,20.299999,71.110001,58.919998,79.5,0.0,0.0
1,80100,14,10,19.855000,20.430000,18.5,19.020000,20.299999,71.320000,59.000000,82.0,0.0,0.0
2,81000,14,10,19.850000,20.410000,18.5,19.020000,20.299999,71.470001,59.109997,79.5,0.0,0.0
3,81900,14,10,19.840000,20.379999,18.5,19.080000,20.299999,71.439995,59.309998,77.0,0.0,0.0
4,82800,14,10,19.830000,20.350000,18.5,19.080000,20.299999,71.580002,59.559998,79.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33883,74700,14,4,19.539999,20.004999,20.5,19.619999,14.700000,39.020000,27.930000,57.0,0.0,0.0
33884,75600,14,4,19.520000,19.949999,20.5,19.539999,13.700000,39.020000,28.090000,57.0,0.0,0.0
33885,76500,14,4,19.430000,19.955000,20.5,19.420000,13.700000,39.399998,27.930000,57.0,0.0,0.0
33886,77400,14,4,19.420000,19.920000,20.5,19.400000,13.700000,39.599998,28.039999,57.0,0.0,0.0


In [6]:
#Create lag features and change the positioning of the columns

lags = +1  # Specify the number of timesteps for the lag feature

# Select the columns for which you want to add lag features
columns_to_lag = ["T_Supply", "T_Return", "SP_Return", "T_Saturation", "T_Outdoor", "RH_Supply", "RH_Return", "RH_Outdoor", "Energy", "Power"]

# Loop through each column and add the lag feature
for col in columns_to_lag:
    df[col + '_lag'] = df[col].shift(lags)

# Drop rows with NaN values (due to shifting)
df.dropna(inplace=True)

# Create a list to store the reordered column names
reordered_columns = []

# Add the time-related columns first
time_columns = ["time", "day", "month"]
reordered_columns.extend(time_columns)

# Iterate through each feature and its lag feature
for col in columns_to_lag:
    lag_col = col + '_lag'
    reordered_columns.extend([col, lag_col])

# Add any remaining columns not included in the lag feature list
remaining_columns = list(set(df.columns) - set(reordered_columns))
reordered_columns.extend(remaining_columns)

# Reorder the columns in the dataframe
df = df[reordered_columns]
df

Unnamed: 0,time,day,month,T_Supply,T_Supply_lag,T_Return,T_Return_lag,SP_Return,SP_Return_lag,T_Saturation,...,RH_Supply,RH_Supply_lag,RH_Return,RH_Return_lag,RH_Outdoor,RH_Outdoor_lag,Energy,Energy_lag,Power,Power_lag
1,80100,14,10,19.855000,19.859999,20.430000,20.469999,18.5,18.5,19.020000,...,71.320000,71.110001,59.000000,58.919998,82.0,79.5,0.0,0.0,0.0,0.0
2,81000,14,10,19.850000,19.855000,20.410000,20.430000,18.5,18.5,19.020000,...,71.470001,71.320000,59.109997,59.000000,79.5,82.0,0.0,0.0,0.0,0.0
3,81900,14,10,19.840000,19.850000,20.379999,20.410000,18.5,18.5,19.080000,...,71.439995,71.470001,59.309998,59.109997,77.0,79.5,0.0,0.0,0.0,0.0
4,82800,14,10,19.830000,19.840000,20.350000,20.379999,18.5,18.5,19.080000,...,71.580002,71.439995,59.559998,59.309998,79.5,77.0,0.0,0.0,0.0,0.0
5,83700,14,10,19.830000,19.830000,20.320000,20.350000,18.5,18.5,19.100000,...,71.599998,71.580002,59.660000,59.559998,82.0,79.5,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33883,74700,14,4,19.539999,19.539999,20.004999,20.059999,20.5,20.5,19.619999,...,39.020000,39.020000,27.930000,27.769999,57.0,57.0,0.0,0.0,0.0,0.0
33884,75600,14,4,19.520000,19.539999,19.949999,20.004999,20.5,20.5,19.539999,...,39.020000,39.020000,28.090000,27.930000,57.0,57.0,0.0,0.0,0.0,0.0
33885,76500,14,4,19.430000,19.520000,19.955000,19.949999,20.5,20.5,19.420000,...,39.399998,39.020000,27.930000,28.090000,57.0,57.0,0.0,0.0,0.0,0.0
33886,77400,14,4,19.420000,19.430000,19.920000,19.955000,20.5,20.5,19.400000,...,39.599998,39.399998,28.039999,27.930000,57.0,57.0,0.0,0.0,0.0,0.0


In [7]:
# Scale the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

X = df[['time', 'day', 'month', 'T_Supply_lag', 'T_Return_lag', 'SP_Return_lag', 'T_Saturation_lag', 'T_Outdoor_lag', 'RH_Supply_lag', 'RH_Return_lag', 'RH_Outdoor_lag', 'Energy_lag', 'Power_lag' ]]
y = df.drop(columns=['time', 'day', 'month', 'T_Supply_lag', 'T_Return_lag', 'SP_Return_lag', 'T_Saturation_lag', 'T_Outdoor_lag', 'RH_Supply_lag', 'RH_Return_lag', 'RH_Outdoor_lag', 'Energy_lag', 'Power_lag' ])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize and train the AutoKeras model
regressor = ak.StructuredDataRegressor(max_trials=10, overwrite=True)
regressor.fit(X_train, y_train, verbose=2)

# Evaluate the model on the testing set
mse = mean_squared_error(y_test, regressor.predict(X_test))
mae = mean_absolute_error(y_test, regressor.predict(X_test))
mape = mean_absolute_percentage_error(y_test, regressor.predict(X_test))
rmse = np.sqrt(mse)

print('Mean Squared Error:', mse)
print('Mean Absolute Error:', mae)
print('Mean Absolute Percentage Error:', mape)
print('Root Mean Squared Error:', rmse)

# Make predictions
predictions = regressor.predict(X_test)

# Print the predicted values for the next 2 timesteps
print('Predicted values for the next 2 timesteps:')
print(predictions)

Trial 7 Complete [00h 03m 06s]
val_loss: 8.474771499633789

Best val_loss So Far: 6.857917308807373
Total elapsed time: 00h 31m 22s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
True              |True              |structured_data_block_1/normalize
False             |False             |structured_data_block_1/dense_block_1/use_batchnorm
1                 |2                 |structured_data_block_1/dense_block_1/num_layers
32                |32                |structured_data_block_1/dense_block_1/units_0
0                 |0                 |structured_data_block_1/dense_block_1/dropout
16                |16                |structured_data_block_1/dense_block_1/units_1
0                 |0                 |regression_head_1/dropout
adam              |adam              |optimizer
0.001             |0.001             |learning_rate

Epoch 1/1000
679/679 - 2s - loss: 648.4546 - mean_squared_error: 648.4546 - val_loss: 190.1166 - val_mean_squared_error: 1