Step 1: Load and preprocess the data

In [None]:
import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

# Load the dataset
file_path = '/mnt/data/dataSet LoraWAN.xlsx'
df = pd.read_excel(file_path)

# Convert time column to datetime
df['time'] = pd.to_datetime(df['time'])

# Display the first few rows of the dataset
df.head()

# Check for missing values
print(df.isna().sum())

# Fill missing values in 'power_consumption' with the mean value of the column
df['power_consumption'].fillna(df['power_consumption'].mean(), inplace=True)

Step 2: Resample the data

In [None]:
# Resample the data to hourly intervals
df.set_index('time', inplace=True)
df = df.resample('H').mean()

# Reset the index
df.reset_index(inplace=True)

# Display the first few rows of the resampled dataset
df.head()

Step 3: Split the data

In [None]:
# Define the split ratios
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Split the data
n = len(df)
train_df = df[:int(n * train_ratio)]
val_df = df[int(n * train_ratio):int(n * (train_ratio + val_ratio))]
test_df = df[int(n * (train_ratio + val_ratio)):]

# Display the sizes of the splits
print(len(train_df), len(val_df), len(test_df))

Step 4: Normalize the data

In [None]:
# Normalize the data
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

Step 5: Create data windows

In [None]:
import numpy as np
import tensorflow as tf

class WindowGenerator():
    def __init__(self, input_width, label_width, shift, train_df, val_df, test_df, label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'
        ])

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis=-1)

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels

    def plot(self, model=None, plot_col='power_consumption', max_subplots=3):
        inputs, labels = self.example
        plt.figure(figsize=(12, 8))
        plot_col_index = self.column_indices[plot_col]
        max_n = min(max_subplots, len(inputs))
        for n in range(max_n):
            plt.subplot(max_n, 1, n + 1)
            plt.ylabel(f'{plot_col} [normed]')
            plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                     label='Inputs', marker='.', zorder=-10)
            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

            plt.scatter(self.label_indices, labels[n, :, label_col_index],
                        edgecolors='k', label='Labels', c='#2ca02c', s=64)
            if model is not None:
                predictions = model(inputs)
                plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                            marker='X', edgecolors='k', label='Predictions',
                            c='#ff7f0e', s=64)

            if n == 0:
                plt.legend()

        plt.xlabel('Time [h]')

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32)

        ds = ds.map(self.split_window)

        return ds

    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, '_example', None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.train))
            # And cache it for next time
            self._example = result
        return result

# Create WindowGenerator object
input_width = 24
label_width = 1
shift = 24
label_columns = ['power_consumption']
w1 = WindowGenerator(input_width, label_width, shift, train_df, val_df, test_df, label_columns)

# Display window configuration
print(w1)


Step 6: Build and train the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Define the model
model = Sequential([
    LSTM(32, return_sequences=False),
    Dense(units=1)
])

model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(w1.train, epochs=10, validation_data=w1.val)


Step 7: Evaluate the model

In [None]:
# Evaluate the model
val_performance = model.evaluate(w1.val)
test_performance = model.evaluate(w1.test)

print(f'Validation MSE: {val_performance}')
print(f'Test MSE: {test_performance}')


Step 8: Plot the results

In [None]:
# Plot the predictions
w1.plot(model=model)
