In [None]:
# Define a dictionary to hold appliance names as keys and corresponding data groups as values
appliance_dict = {
    "dishwasher": [
        ('05/04/2024 23:19:28', '01410593'),
        ('05/04/2024 23:19:38', '01410594'),
        ('05/04/2024 23:19:49', '01410599'),
        ('05/04/2024 23:19:59', '01410604'),
        ('05/04/2024 23:20:10', '01410606'),
        ('05/04/2024 23:20:20', '01410610'),
        ('05/04/2024 23:20:31', '01410611'),
        ('05/04/2024 23:20:52', '01410612'),
        ('05/04/2024 23:21:54', '01410612'),
        ('05/04/2024 23:22:26', '01410613'),
        ('05/04/2024 23:23:08', '01410614'),
        ('05/04/2024 23:23:18', '01410616'),
        ('05/04/2024 23:23:28', '01410618'),
        ('05/04/2024 23:23:39', '01410620')
    ],

    "toilet_tm": [
        ('05/05/2024 11:52:26', '01410818'),
        ('05/05/2024 11:52:36', '01410819'),
        ('05/05/2024 11:52:47', '01410826'),
        ('05/05/2024 11:52:57', '01410833'),
    ],

    "sink_tm": [
        ('05/05/2024 11:56:48', '01410833'),
        ('05/05/2024 11:56:58', '01410834'),
    ]
}

In [None]:
from datetime import datetime, timedelta
from itertools import combinations
import random
import pandas as pd
import numpy as np

def change_in_points(appliance_dict):

    combined_data_dict = {}

    # Iterate over each key-value pair in the appliance_dict
    for appliance, data in appliance_dict.items():
        # Extract timestamps and water meter readings
        timestamps = [entry[0] for entry in data]
        readings = [int(entry[1])/10.0 for entry in data]

        print(timestamps, readings)

        # Calculate change in water meter reading
        changes_readings = [readings[i+1] - readings[i] for i in range(len(readings)-1)]

        # Calculate change in timestamps
        changes_timestamps = [(datetime.strptime(timestamps[i+1], '%m/%d/%Y %H:%M:%S') - datetime.strptime(timestamps[i], '%m/%d/%Y %H:%M:%S')).total_seconds() for i in range(len(timestamps)-1)]

        # Round down changes in timestamps to the nearest 10 seconds
        rounded_changes_timestamps = [round(change / 10.0) * 10 for change in changes_timestamps]

        # Combine timestamps and changes
        combined_data_dict[appliance] = list(zip(rounded_changes_timestamps, changes_readings))

    return combined_data_dict

# dictionary of item: list of tuples - (seconds, water meter reading)
data = change_in_points(appliance_dict)

['05/04/2024 23:19:28', '05/04/2024 23:19:38', '05/04/2024 23:19:49', '05/04/2024 23:19:59', '05/04/2024 23:20:10', '05/04/2024 23:20:20', '05/04/2024 23:20:31', '05/04/2024 23:20:52', '05/04/2024 23:21:54', '05/04/2024 23:22:26', '05/04/2024 23:23:08', '05/04/2024 23:23:18', '05/04/2024 23:23:28', '05/04/2024 23:23:39'] [141059.3, 141059.4, 141059.9, 141060.4, 141060.6, 141061.0, 141061.1, 141061.2, 141061.2, 141061.3, 141061.4, 141061.6, 141061.8, 141062.0]
['05/05/2024 11:52:26', '05/05/2024 11:52:36', '05/05/2024 11:52:47', '05/05/2024 11:52:57'] [141081.8, 141081.9, 141082.6, 141083.3]
['05/05/2024 11:56:48', '05/05/2024 11:56:58'] [141083.3, 141083.4]


In [None]:
import random

def generate_combined_lists(large_item, small_item, num_combinations, item_dict, num_of_iter):
    """
    Generates a specified number of combined lists by inserting the small list into the large list at random indices.

    Parameters:
    large_item (dict): The key is the item name and the value is the larger list of tuples (change in seconds, water meter reading change).
    small_item (dict): The key is the item name and the value is the smaller list of tuples to be combined into the larger list.
    num_combinations (int): The number of different combined lists to generate.
    num_items (dict): The appliances as keys and the value as the inputs output (number of times it appeared)

    Returns:
    list: A list of combined lists.
    """
    large_list_item = list(large_item.keys())[0]
    large_list = list(large_item.values())[0]

    small_list_item = list(small_item.keys())[0]
    small_list = list(small_item.values())[0]

    def combine_lists(large_list, small_list, num_of_iter=5):
        """
        Combines a small list into a larger list at a random index where the first elements match,
        and adds the water meter readings for each of the following tuples while keeping the
        change in seconds the same.

        Parameters:
        large_list (list): The larger list of tuples (change in seconds, water meter reading change).
        small_list (list): The smaller list of tuples to be combined into the larger list.
        num_of_iter (int): The number of times to add the smaller list to the larger list (generatures numeorus lists)

        Returns:
        list: A new list with the combined data.
        """
        combined_list_s = []

        for _ in range(num_of_iter):
          # Find all valid insertion points where the first element matches
          valid_indices = [i for i, (sec, _) in enumerate(large_list) if sec == small_list[0][0]]

          if not valid_indices:
              raise ValueError("No valid insertion point found where the first elements match.")

          # Choose a random valid insertion index
          insert_index = random.choice(valid_indices)

          # Create a copy of the large list to avoid modifying the original list
          combined_list = large_list[:]

          # Insert small list into the combined list at the chosen index
          for i, (sec, reading) in enumerate(small_list):
              try:
                  combined_sec, combined_reading = combined_list[insert_index + i]

                  if sec < combined_sec:
                      combined_list.insert(insert_index + i, (sec, reading))
                      combined_sec, combined_reading = combined_list[insert_index + i + 1]
                      combined_list[insert_index + i + 1] = (combined_sec - sec, combined_reading)
                  elif sec > combined_sec:
                      insert_index = insert_index + sec % 10
                      _, combined_reading = combined_list[insert_index + i]
                      combined_list[insert_index + i] = (sec, combined_reading + reading)
                  else:
                      combined_list[insert_index + i] = (sec, combined_reading + reading)

              except IndexError:
                  combined_list.append((sec, reading))
          combined_list_s.append(np.array(combined_list))
          large_list = combined_list


        return combined_list_s

    combined_lists = []
    combined_lists_outputs = []

    for _ in range(num_combinations):
        combined_list1 = combine_lists(large_list, small_list, num_of_iter)

        for combined_list in combined_list1:
          combined_lists.append(combined_list)

        a = item_dict.copy()
        a[large_list_item] += 1
        for i in range(1, num_of_iter+1):
            a[small_list_item] = item_dict[small_list_item] + i
            combined_lists_outputs.append(np.array(list(a.values())))

        combined_lists.append(large_list)
        combined_lists.append(small_list)
        a = item_dict.copy()
        a[large_list_item] += 1
        combined_lists_outputs.append(np.array(list(a.values())))
        a = item_dict.copy()
        a[small_list_item] += 1
        combined_lists_outputs.append(np.array(list(a.values())))

    # Print the original lists and the combined lists
    """print("Large List:", large_list)
    print("Small List:", small_list)
    print(f"Generated {num_combinations} Combined Lists:")
    for i, cl in enumerate(combined_lists):
        print(f"Combined List {i+1}:", cl)
        print(f"Combined List {i+1}:", combined_lists_outputs[i])"""

    return combined_lists, combined_lists_outputs


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.metrics import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.utils import Sequence
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.callbacks import ModelCheckpoint




def get_new_x_y(data, num_combinations, item_dict, p=10):
    x_data = []
    y_data = []

    for i in range(p):
      keys = random.sample(data.keys(), 2)
      random_pairs = {key: data[key] for key in keys}

      larger_pair_key = max(random_pairs, key=lambda key: len(random_pairs[key]))
      large_item = {larger_pair_key: random_pairs[larger_pair_key]}

      smaller_pair_key = min(random_pairs, key=lambda key: len(random_pairs[key]))
      small_item = {smaller_pair_key: random_pairs[smaller_pair_key]}

      x, y = generate_combined_lists(large_item, small_item, num_combinations, item_dict, num_of_iter=20)
      for i in x:
        x_data.append(i)
      for i in y:
        y_data.append(i)

    x_data = pad_sequences(x_data, padding='post', dtype='float32')
    y_data = pad_sequences(y_data, padding='post', dtype='float32')
    return x_data, y_data

class DataGenerator(Sequence):
    def __init__(self, data, item_dict, p=10, batch_size=32, num_combinations=10, shuffle=True):
        self.data = data
        self.batch_size = batch_size
        self.num_combinations = num_combinations
        self.shuffle = shuffle
        self.item_dict = item_dict
        self.p = p

        self.x_data, self.y_data = get_new_x_y(self.data, self.num_combinations, self.item_dict, self.p)

        print(self.x_data.shape)
        print(self.y_data.shape)


        self.indices = np.arange(len(self.x_data))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.x_data) / self.batch_size))

    def __getitem__(self, index):

        self.x_data, self.y_data = get_new_x_y(self.data, self.num_combinations, self.item_dict, self.p)

        batch_indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        x_batch = self.x_data[batch_indices]
        y_batch = self.y_data[batch_indices]
        return x_batch, y_batch

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

# Create data generator

output_dic = {'dishwasher': 0, 'toilet_tm': 0, 'sink_tm': 0, 'toilet_km': 0, 'shower_tm': 0, 'shower_km': 0, 'hose': 0, 'laundry': 0}

batch_size = 64

train_generator = DataGenerator(data, output_dic, p=50, batch_size=32)
val_generator = DataGenerator(data, output_dic, p=10, batch_size=32)


# Model definition
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(None, 2)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(len(list(output_dic.keys()))))


model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mae', 'mse'])

# Define the checkpoint callback
checkpoint_callback = ModelCheckpoint(
    filepath='model_checkpoint_epoch_{epoch:02d}.h5',  # Save model with epoch number
    save_freq='epoch',  # Save every epoch
    period=10  # Save every 10 epochs
)

# Train the model
model.fit(train_generator, epochs=1, validation_data=val_generator,
    callbacks=[checkpoint_callback])

# Make predictions
x_test,y_test = get_new_x_y(data, 5, output_dic)
# Make predictions
predictions = model.predict(x_test)

print(np.round(predictions))
print(y_test)

# Calculate MAE and MSE
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")


since Python 3.9 and will be removed in a subsequent version.
  keys = random.sample(data.keys(), 2)


(11000, 27, 2)
(11000, 8)
(2200, 28, 2)
(2200, 8)




KeyboardInterrupt: 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.metrics import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.utils import Sequence
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.callbacks import ModelCheckpoint


class VariableDataGenerator(Sequence):
    def __init__(self, d, item_dict, p=10, batch_size=32, num_combinations=10, shuffle=True):
        self.d = d
        self.batch_size = batch_size

        self.num_combinations = num_combinations
        self.item_dict = item_dict
        self.p = p
        self.data, self.labels = get_new_x_y(self.d, self.num_combinations, self.item_dict, self.p)

        self.indexes = np.arange(len(self.data))

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, idx):

        self.data, self.labels = get_new_x_y(self.d, self.num_combinations, self.item_dict, self.p)

        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_data = [self.data[i] for i in batch_indexes]
        batch_labels = [self.labels[i] for i in batch_indexes]

        # Pad sequences to the same length
        x_data = pad_sequences(batch_data, padding='post', dtype='float32')
        y_data = pad_sequences(batch_labels, padding='post', dtype='float32')

        return np.array(x_data), np.array(y_data)

# Create data generator

output_dic = {'dishwasher': 0, 'toilet_tm': 0, 'sink_tm': 0, 'toilet_km': 0, 'shower_tm': 0, 'shower_km': 0, 'hose': 0, 'laundry': 0}

batch_size = 64

train_generator = VariableDataGenerator(data, output_dic, p=50, batch_size=batch_size)
val_generator = VariableDataGenerator(data, output_dic, p=10, batch_size=batch_size)


# Model definition
model = Sequential()
model.add(LSTM(units=64, input_shape=(None, 2)))
model.add(Dense(units=len(list(output_dic.keys()))))


model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mae', 'mse'])

# Define the checkpoint callback
checkpoint_callback = ModelCheckpoint(
    filepath='model_checkpoint_epoch_{epoch:02d}.h5',  # Save model with epoch number
    save_freq='epoch',  # Save every epoch
    period=10  # Save every 10 epochs
)

# Train the model
model.fit(train_generator, epochs=100, validation_data=val_generator,
    callbacks=[checkpoint_callback])


since Python 3.9 and will be removed in a subsequent version.
  keys = random.sample(data.keys(), 2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


  saving_api.save_model(


Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100

In [None]:
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Load a specific saved model, for example, the model saved at epoch 10
saved_model = load_model('/content/drive/MyDrive/model_checkpoint_epoch_40.h5')

# New data for prediction
new_data = {'toilet_tm': [
        ('05/05/2024 11:52:26', '01410818'),
        ('05/05/2024 11:52:36', '01410819'),
        ('05/05/2024 11:52:47', '01410830'),
        ('05/05/2024 11:52:57', '01410837'),
    ]}

x_test = change_in_points(new_data)
x_test = np.array(list(x_test.values())[0])
x_test = np.reshape(x_test, (1,3,2))
print(x_test.shape)

# Make predictions
predictions = saved_model.predict(x_test)

print("Predictions:", np.round(predictions))


# Assuming you have the true labels for the new data
true_labels = [0,1,0,0,0,0,0,0]



['05/05/2024 11:52:26', '05/05/2024 11:52:36', '05/05/2024 11:52:47', '05/05/2024 11:52:57'] [141081.8, 141081.9, 141083.0, 141083.7]
(1, 3, 2)




Predictions: [[-0.  1.  3.  0.  0. -0. -0. -0.]]
