In [None]:
# import pandas as pd
# import xgboost as xgb
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import mean_squared_error
# from sklearn.preprocessing import OneHotEncoder

# # Prepare the data as before
# df = pd.read_csv("light_data_with_hour_only.csv")

# # Shift the light level to get the next light level
# df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
# df = df.dropna(subset=['next_light_level'])

# # One-hot encode 'room' column
# encoder = OneHotEncoder(sparse_output=False)
# encoded_rooms = encoder.fit_transform(df[['room']])
# encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=encoder.categories_[0])

# # Merge everything into a final DataFrame
# df = pd.concat([df, encoded_rooms_df], axis=1)
# X = df[['timestamp'] + list(encoded_rooms_df.columns) + ['light_level']]
# y = df['next_light_level']

# # Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Train XGBoost model
# dtrain = xgb.DMatrix(X_train, label=y_train)
# dtest = xgb.DMatrix(X_test, label=y_test)

# params = {'objective': 'reg:squarederror', 'eval_metric': 'rmse'}
# model = xgb.train(params, dtrain, num_boost_round=100)

# # Save the trained model
# model.save_model('xgb_model.json')  # Save model to a file (JSON format)

# # Predict on test data
# y_pred = model.predict(dtest)

# # Evaluate model
# mse = mean_squared_error(y_test, y_pred)
# print(f'Mean Squared Error: {mse}')

Mean Squared Error: 39.76382254857484


This first version remakes the xgb model based on the new dataset, which is appropriately formatted for influxdb.

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Load dataset, skipping lines starting with '#'
df = pd.read_csv("annotated_light_tracking.csv", comment="#")
# The first three lines are for header
df = df.iloc[2:]
# Strip any extra whitespace from column names
df.columns = df.columns.str.strip()
# Verify column names to make sure '_time' exists
#print(df.columns)

# Convert the '_time' column to datetime if it's not already
df['_time'] = pd.to_datetime(df['_time'])

# Check the first few rows to ensure data is loaded correctly
print(df.head())

# If '_time' is parsed correctly, continue with feature engineering
df['hour'] = df['_time'].dt.hour

# Check if the transformation works
print(df[['hour', '_time']].head())

# Rename and select required columns
df = df.rename(columns={"position": "room", "_value": "light_level"})
df = df[['hour', 'room', 'light_level']]

# Shift light level to create the target for the next time step
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
df = df.dropna(subset=['next_light_level'])
df['next_light_level'] = df['next_light_level'].apply(lambda x: int(x))
print()
print("Let's print the shifted dataset")
print(df[1:20])

# One-hot encode the 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
room_columns = encoder.categories_[0]  # Room categories (e.g., balcony, bedroom, living_room)
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=room_columns)

# Merge encoded rooms with the original DataFrame
df = pd.concat([df.reset_index(drop=True), encoded_rooms_df.reset_index(drop=True)], axis=1)

# Prepare features and target
X = df[['hour'] + list(room_columns) + ['light_level']]
print("I now print the training values")
print(X.head())
y = df['next_light_level']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {'objective': 'reg:squarederror', 'eval_metric': 'rmse'}
model = xgb.train(params, dtrain, num_boost_round=20)

# Save the trained model and encoder
model.save_model("xgb_model.json")
encoder_path = "room_encoder.npy"
np.save(encoder_path, encoder)  # Save encoder for future predictions

# Predict on test data
y_pred = model.predict(dtest).astype(int)

# Print some prediction examples
print(f'Room       | Current Light Level | Real Next Light | Predicted Next Light')
for i, (real, pred) in enumerate(zip(y_test, y_pred)):
    current_light = X_test.iloc[i]['light_level']
    room = X_test.iloc[i][['balcony', 'bedroom']].idxmax()
    print(f'{room:<10} | {current_light}              | {real}            | {pred}')
    if i >= 20:
        break

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

   Unnamed: 0  result  table                       _start  \
2         NaN     NaN      1  2025-01-08T10:46:44.129733Z   
3         NaN     NaN      1  2025-01-08T10:46:44.129733Z   
4         NaN     NaN      1  2025-01-08T10:46:44.129733Z   
5         NaN     NaN      1  2025-01-08T10:46:44.129733Z   
6         NaN     NaN      1  2025-01-08T10:46:44.129733Z   

                         _stop                     _time  _value  \
2  2025-01-08T16:46:44.129889Z 2024-11-01 01:00:00+00:00       0   
3  2025-01-08T16:46:44.129889Z 2024-11-01 01:00:00+00:00       0   
4  2025-01-08T16:46:44.129889Z 2024-11-01 02:00:00+00:00       0   
5  2025-01-08T16:46:44.129889Z 2024-11-01 02:00:00+00:00       0   
6  2025-01-08T16:46:44.129889Z 2024-11-01 03:00:00+00:00       0   

         _field    _measurement device_id position  sampling_rate  
2  sensors_mean  light_tracking   ESP32_1  balcony           5000  
3  sensors_mean  light_tracking   ESP32_1  bedroom           5000  
4  sensors_mean  lig

The following code is to train a NN on the formatted dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load dataset, skipping lines starting with '#'
df = pd.read_csv("annotated_light_tracking.csv", comment="#")
# The first three lines are for header
df = df.iloc[2:]
# Strip any extra whitespace from column names
df.columns = df.columns.str.strip()

# Convert the '_time' column to datetime if it's not already
df['_time'] = pd.to_datetime(df['_time'])

# Feature engineering: Extract hour from the '_time' column
df['hour'] = df['_time'].dt.hour

# Rename and select required columns
df = df.rename(columns={"position": "room", "_value": "light_level"})
df = df[['hour', 'room', 'light_level']]

# Shift light level to create the target for the next time step
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
df = df.dropna(subset=['next_light_level'])
df = df.groupby('room').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)
print(df[1_30])

# One-hot encode the 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
room_columns = encoder.categories_[0]  # Room categories (e.g., balcony, bedroom, living_room)
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=room_columns)

# Merge encoded rooms with the original DataFrame
df = pd.concat([df.reset_index(drop=True), encoded_rooms_df.reset_index(drop=True)], axis=1)

# Prepare features and target
X = df[['hour'] + list(room_columns) + ['light_level']]
y = df['next_light_level']

# Normalize the features (important for neural networks)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the Neural Network model using Keras
model = keras.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer with shape of features
    layers.Dense(128, activation='relu'),  # First hidden layer with more units
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(64, activation='relu'),  # Second hidden layer with more units
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(32, activation='relu'),  # Third hidden layer
    layers.Dense(1)  # Output layer (single regression value)
])

# Compile the model with the Adam optimizer and learning rate decay
initial_lr = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=initial_lr,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              loss='mean_squared_error',
              metrics=['mean_squared_error'])

#

# Train the model
history = model.fit(X_train, y_train, epochs=520, batch_size=32, validation_split=0.2)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Save the trained model and encoder
model.save("neural_network_model.h5")
encoder_path = "room_encoder.npy"
np.save(encoder_path, encoder)  # Save encoder for future predictions

Epoch 1/520




[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 3235.2041 - mean_squared_error: 3235.2041 - val_loss: 1207.3223 - val_mean_squared_error: 1207.3223
Epoch 2/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 850.8309 - mean_squared_error: 850.8309 - val_loss: 569.1741 - val_mean_squared_error: 569.1741
Epoch 3/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 527.9602 - mean_squared_error: 527.9602 - val_loss: 529.7346 - val_mean_squared_error: 529.7346
Epoch 4/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 485.5536 - mean_squared_error: 485.5536 - val_loss: 501.5987 - val_mean_squared_error: 501.5987
Epoch 5/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 512.2957 - mean_squared_error: 512.2957 - val_loss: 482.8434 - val_mean_squared_error: 482.8434
Epoch 6/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



Mean Squared Error: 81.88303113445454


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load dataset, skipping lines starting with '#'
df = pd.read_csv("annotated_light_tracking.csv", comment="#")
# The first three lines are for header
df = df.iloc[2:]
# Strip any extra whitespace from column names
df.columns = df.columns.str.strip()

# Convert the '_time' column to datetime if it's not already
df['_time'] = pd.to_datetime(df['_time'])

# Feature engineering: Extract hour from the '_time' column
df['hour'] = df['_time'].dt.hour

# Rename and select required columns
df = df.rename(columns={"position": "room", "_value": "light_level"})
df = df[['hour', 'room', 'light_level']]

# Shift light level to create the target for the next time step
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)

# Create lagged light features
df['light_level_lag_1'] = df.groupby('room')['light_level'].shift(1)
df['light_level_lag_2'] = df.groupby('room')['light_level'].shift(2)
df['light_level_lag_3'] = df.groupby('room')['light_level'].shift(3)

# Drop rows with missing values due to shifting
df = df.dropna(subset=['next_light_level', 'light_level_lag_1', 'light_level_lag_2', 'light_level_lag_3'])
print(df.head())

# One-hot encode the 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
room_columns = encoder.categories_[0]  # Room categories (e.g., balcony, bedroom, living_room)
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=room_columns)

# Merge encoded rooms with the original DataFrame
df = pd.concat([df.reset_index(drop=True), encoded_rooms_df.reset_index(drop=True)], axis=1)

# Prepare features and target
X = df[['hour'] + list(room_columns) + ['light_level', 'light_level_lag_1', 'light_level_lag_2', 'light_level_lag_3']]
y = df['next_light_level']

# Normalize the features (important for neural networks)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the Neural Network model using Keras
model = keras.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer with shape of features
    layers.Dense(128, activation='relu'),  # First hidden layer with more units
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(64, activation='relu'),  # Second hidden layer with more units
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(32, activation='relu'),  # Third hidden layer
    layers.Dense(1)  # Output layer (single regression value)
])

# Compile the model with the Adam optimizer and learning rate decay
initial_lr = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=initial_lr,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              loss='mean_squared_error',
              metrics=['mean_squared_error'])

# Train the model
history = model.fit(X_train, y_train, epochs=520, batch_size=32, validation_split=0.2)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Save the trained model and encoder
model.save("neural_network_model.h5")
encoder_path = "room_encoder.npy"
np.save(encoder_path, encoder)  # Save encoder for future predictions

    hour     room  light_level  next_light_level  light_level_lag_1  \
8      4  balcony            0               0.0                0.0   
9      4  bedroom            0               0.0                0.0   
10     5  balcony            0               0.0                0.0   
11     5  bedroom            0               0.0                0.0   
12     6  balcony            0              97.0                0.0   

    light_level_lag_2  light_level_lag_3  
8                 0.0                0.0  
9                 0.0                0.0  
10                0.0                0.0  
11                0.0                0.0  
12                0.0                0.0  
Epoch 1/520




[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 2929.2017 - mean_squared_error: 2929.2017 - val_loss: 1093.5249 - val_mean_squared_error: 1093.5249
Epoch 2/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 934.0981 - mean_squared_error: 934.0981 - val_loss: 602.2454 - val_mean_squared_error: 602.2454
Epoch 3/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 617.1867 - mean_squared_error: 617.1867 - val_loss: 520.5230 - val_mean_squared_error: 520.5230
Epoch 4/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 541.7854 - mean_squared_error: 541.7854 - val_loss: 483.2127 - val_mean_squared_error: 483.2127
Epoch 5/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 579.8109 - mean_squared_error: 579.8109 - val_loss: 442.8338 - val_mean_squared_error: 442.8338
Epoch 6/520
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



Mean Squared Error: 64.05738886774597


In [None]:
# I only print df where room is bedroom
print(df[df['bedroom'] == 1][:30])

    hour     room  light_level  next_light_level  light_level_lag_1  \
1      4  bedroom            0               0.0                0.0   
3      5  bedroom            0               0.0                0.0   
5      6  bedroom            0               0.0                0.0   
7      7  bedroom            0               0.0                0.0   
9      8  bedroom            0              93.0                0.0   
11     9  bedroom           93              92.0                0.0   
13    10  bedroom           92              92.0               93.0   
15    11  bedroom           92              91.0               92.0   
17    12  bedroom           91              89.0               92.0   
19    13  bedroom           89              91.0               91.0   
21    14  bedroom           91              93.0               89.0   
23    15  bedroom           93              93.0               91.0   
25    16  bedroom           93               4.0               93.0   
27    

In [None]:
def predict_and_print(model, X_data, y_data):
    """
    This function takes a trained model, input features (X_data), and real target values (y_data),
    and prints the room, current light level, real light level, and predicted light level.

    Parameters:
    - model: Trained XGBoost model
    - X_data: Input features (e.g., test data or any new data)
    - y_data: Actual light levels (real values) to compare predictions against
    """
    # Make predictions on the input data
    y_pred = model.predict(xgb.DMatrix(X_data))

    # Iterate over the data and print the room, current light level, real, and predicted light levels
    print(f'Room       | Current Light Level | Real Next Light | Predicted Next Light')

    for i, (real, pred) in enumerate(zip(y_data, y_pred)):
        # Extract the current light level from the 'light_level' column of X_data
        current_light = X_data.iloc[i]['light_level']

        # Determine the room by checking which column has a value of 1.0
        room = X_data.iloc[i][['balcony', 'bedroom', 'living_room']].idxmax()

        print(f'{room:<10} | {current_light:.2f}              | {real:.2f}            | {pred:.2f}')

# Example usage:
# Predict and print the results for the test data
predict_and_print(model, X_test, y_test)

Room       | Current Light Level | Real Next Light | Predicted Next Light
living_room | 75.73              | 73.57            | 77.03
bedroom    | 94.07              | 73.35            | 71.16
living_room | 74.45              | 63.63            | 77.20
balcony    | 95.92              | 61.15            | 75.59
bedroom    | 65.77              | 84.99            | 74.36
living_room | 99.43              | 69.35            | 79.59
balcony    | 76.47              | 80.85            | 81.44
living_room | 98.60              | 61.18            | 72.31
living_room | 90.85              | 62.39            | 82.77
living_room | 69.94              | 75.34            | 81.83
living_room | 67.89              | 88.79            | 71.26
living_room | 63.18              | 82.80            | 76.29
balcony    | 98.57              | 97.10            | 77.11
living_room | 61.33              | 83.53            | 74.99
bedroom    | 63.86              | 93.35            | 83.27
bedroom    | 86.32              

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error

# Prepare the data as before
df = pd.read_csv("light_data_with_hour_only.csv")

# Shift the light level to get the next light level
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
df = df.dropna(subset=['next_light_level'])

# One-hot encode 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=encoder.categories_[0])

# Merge everything into a final DataFrame
df = pd.concat([df, encoded_rooms_df], axis=1)
X = df[['timestamp'] + list(encoded_rooms_df.columns) + ['light_level']]
y = df['next_light_level']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model using TensorFlow (Keras)
model = tf.keras.Sequential()

# Input layer: First, we define the input shape, which is the number of features in X
model.add(tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)))

# Hidden layer 1: A dense layer with 64 neurons and ReLU activation function
model.add(tf.keras.layers.Dense(64, activation='relu'))

# Hidden layer 2: A dense layer with 32 neurons and ReLU activation function
model.add(tf.keras.layers.Dense(32, activation='relu'))

# Output layer: A single neuron to predict the next light level
model.add(tf.keras.layers.Dense(1))

# Compile the model: We'll use Mean Squared Error loss and Adam optimizer
model.compile(optimizer='adam', loss='mse')

# Define the ModelCheckpoint callback to save only the best weights based on validation loss
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'NN_model.weights.keras',  # File path to save the best weights
    monitor='val_loss',          # Monitor validation loss
    save_best_only=True,         # Only save the weights when the model improves
    mode='min',                  # Minimize the validation loss
    verbose=1                    # Display a message when saving the model
)

# Train the model with the ModelCheckpoint callback
model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2, callbacks=[checkpoint])

# Evaluate the model on the test data
y_pred = model.predict(X_test)

# Evaluate and print the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Function to print real and predicted light levels
def predict_and_print_nn(model, X_data, y_data):
    """
    This function takes a trained model, input features (X_data), and real target values (y_data),
    and prints the room, current light level, real light level, and predicted light level.
    """
    # Make predictions on the input data
    y_pred = model.predict(X_data)

    # Iterate over the data and print the room, current light level, real, and predicted light levels
    print(f'Room       | Current Light Level | Real Light Level | Predicted Light Level')

    for i, (real, pred) in enumerate(zip(y_data, y_pred)):
        # Extract the current light level from the 'light_level' column of X_data
        current_light = X_data.iloc[i]['light_level']

        # Determine the room by checking which column has a value of 1.0
        room = X_data.iloc[i][['balcony', 'bedroom', 'living_room']].idxmax()

        print(f'{room:<10} | {current_light:.2f}              | {real:.2f}            | {pred[0]:.2f}')

# Example usage:
# Predict and print the results for the test data
predict_and_print_nn(model, X_test, y_test)

KeyboardInterrupt: 

In [None]:
import pandas as pd
import tensorflow as tf
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load the dataset
df = pd.read_csv("light_data_with_hour_only.csv")

# Shift the light level to get the next light level
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
df = df.dropna(subset=['next_light_level'])

# One-hot encode 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=encoder.categories_[0])

# Merge everything into a final DataFrame
df = pd.concat([df, encoded_rooms_df], axis=1)
X = df[['timestamp'] + list(encoded_rooms_df.columns) + ['light_level']]
y = df['next_light_level']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------------
# Load the XGBoost model
# ------------------------
xgb_model = xgb.Booster()
xgb_model.load_model('xgb_model.json')  # Load the XGBoost model

# ------------------------
# Load the Neural Network model
# ------------------------
nn_model = tf.keras.models.load_model('best_NN_model.weights.keras')  # Load the NN model

# ------------------------
# Make predictions using both models
# ------------------------

# XGBoost predictions
dtest = xgb.DMatrix(X_test)
xgb_predictions = xgb_model.predict(dtest)

# Neural Network predictions
nn_predictions = nn_model.predict(X_test)

# ------------------------
# Evaluate the performance using Mean Squared Error (MSE)
# ------------------------

# Calculate MSE for XGBoost
xgb_mse = mean_squared_error(y_test, xgb_predictions)

# Calculate MSE for Neural Network
nn_mse = mean_squared_error(y_test, nn_predictions)

# ------------------------
# Compare the performances
# ------------------------

print(f'XGBoost Model Mean Squared Error: {xgb_mse:.4f}')
print(f'Neural Network Model Mean Squared Error: {nn_mse:.4f}')

# Additional information: which model performs better
if xgb_mse < nn_mse:
    print("XGBoost performs better.")
else:
    print("Neural Network performs better.")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
XGBoost Model Mean Squared Error: 39.7638
Neural Network Model Mean Squared Error: 51.0926
XGBoost performs better.


In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error

# Prepare the data as before
df = pd.read_csv("light_data_with_hour_only.csv")

# Shift the light level to get the next light level
df['next_light_level'] = df.groupby('room')['light_level'].shift(-1)
df = df.dropna(subset=['next_light_level'])

# One-hot encode 'room' column
encoder = OneHotEncoder(sparse_output=False)
encoded_rooms = encoder.fit_transform(df[['room']])
encoded_rooms_df = pd.DataFrame(encoded_rooms, columns=encoder.categories_[0])

# Merge everything into a final DataFrame
df = pd.concat([df, encoded_rooms_df], axis=1)
X = df[['timestamp'] + list(encoded_rooms_df.columns) + ['light_level']]
y = df['next_light_level']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model using TensorFlow (Keras)
model = tf.keras.Sequential()

# Input layer: First, we define the input shape, which is the number of features in X
model.add(tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)))

#CHANGED STUFF ---------------------
# Hidden layer 1: A dense layer with 128 neurons and ReLU activation function
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))  # Dropout layer for regularization
model.add(tf.keras.layers.BatchNormalization())  # Batch normalization for stability

# Hidden layer 3: A dense layer with 32 neurons and ReLU activation function
model.add(tf.keras.layers.Dense(32, activation='relu'))

# Output layer: A single neuron to predict the next light level
model.add(tf.keras.layers.Dense(1))
#CHANGED STUFF ---------------------

# Compile the model: We'll use Mean Squared Error loss and Adam optimizer
model.compile(optimizer='adam', loss='mse')

# Define the ModelCheckpoint callback to save only the best weights based on validation loss
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'NN_model_deeper.weights.keras',  # File path to save the best weights
    monitor='val_loss',          # Monitor validation loss
    save_best_only=True,         # Only save the weights when the model improves
    mode='min',                  # Minimize the validation loss
    verbose=1                    # Display a message when saving the model
)

# Train the model with the ModelCheckpoint callback
model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2, callbacks=[checkpoint])

# Evaluate the model on the test data
y_pred = model.predict(X_test)

# Evaluate and print the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Function to print real and predicted light levels
def predict_and_print_nn(model, X_data, y_data):
    """
    This function takes a trained model, input features (X_data), and real target values (y_data),
    and prints the room, current light level, real light level, and predicted light level.
    """
    # Make predictions on the input data
    y_pred = model.predict(X_data)

    # Iterate over the data and print the room, current light level, real, and predicted light levels
    print(f'Room       | Current Light Level | Real Light Level | Predicted Light Level')

    for i, (real, pred) in enumerate(zip(y_data, y_pred)):
        # Extract the current light level from the 'light_level' column of X_data
        current_light = X_data.iloc[i]['light_level']

        # Determine the room by checking which column has a value of 1.0
        room = X_data.iloc[i][['balcony', 'bedroom', 'living_room']].idxmax()

        print(f'{room:<10} | {current_light:.2f}              | {real:.2f}            | {pred[0]:.2f}')

# Example usage:
# Predict and print the results for the test data
predict_and_print_nn(model, X_test, y_test)

Epoch 1/500




[1m38/44[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 4ms/step - loss: 1574.5667
Epoch 1: val_loss improved from inf to 965.01324, saving model to NN_model_deeper.weights.keras
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 1551.5646 - val_loss: 965.0132
Epoch 2/500
[1m35/44[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 5ms/step - loss: 1085.7258
Epoch 2: val_loss improved from 965.01324 to 637.13733, saving model to NN_model_deeper.weights.keras
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1095.5575 - val_loss: 637.1373
Epoch 3/500
[1m37/44[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 4ms/step - loss: 920.9799
Epoch 3: val_loss improved from 637.13733 to 308.48425, saving model to NN_model_deeper.weights.keras
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 882.6878 - val_loss: 308.4843
Epoch 4/500
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1