In [None]:
# Define a dictionary to hold appliance names as keys and corresponding data groups as values
appliance_dict = {
    "dishwasher": [
        ('05/04/2024 23:19:28', '01410593'),
        ('05/04/2024 23:19:38', '01410594'),
        ('05/04/2024 23:19:49', '01410599'),
        ('05/04/2024 23:19:59', '01410604'),
        ('05/04/2024 23:20:10', '01410606'),
        ('05/04/2024 23:20:20', '01410610'),
        ('05/04/2024 23:20:31', '01410611'),
        ('05/04/2024 23:20:52', '01410612'),
        ('05/04/2024 23:21:54', '01410612'),
        ('05/04/2024 23:22:26', '01410613'),
        ('05/04/2024 23:23:08', '01410614'),
        ('05/04/2024 23:23:18', '01410616'),
        ('05/04/2024 23:23:28', '01410618'),
        ('05/04/2024 23:23:39', '01410620')
    ],

    "toilet_tm": [
        ('05/05/2024 11:52:26', '01410818'),
        ('05/05/2024 11:52:36', '01410819'),
        ('05/05/2024 11:52:47', '01410826'),
        ('05/05/2024 11:52:57', '01410833'),
    ],

    "sink_tm": [
        ('05/05/2024 11:56:48', '01410833'),
        ('05/05/2024 11:56:58', '01410834'),
    ]
}

In [None]:
from datetime import datetime, timedelta
from itertools import combinations
import random
import pandas as pd
import numpy as np

# Function to create a combined appliance dictionary with pretend data for combined use
def change_in_points(appliance_dict):
    combined_data_dict = {}

    # Iterate over each key-value pair in the appliance_dict
    for appliance, data in appliance_dict.items():
        # Extract timestamps and water meter readings
        timestamps = [entry[0] for entry in data]
        readings = [int(entry[1])/10.0 for entry in data]

        # Calculate change in water meter reading
        changes_readings = [0] + [readings[i+1] - readings[i] for i in range(len(readings)-1)]

        # Calculate change in timestamps
        changes_timestamps = [0] + [(datetime.strptime(timestamps[i+1], '%m/%d/%Y %H:%M:%S') - datetime.strptime(timestamps[i], '%m/%d/%Y %H:%M:%S')).total_seconds() for i in range(len(timestamps)-1)]

        # Round down changes in timestamps to the nearest 10 seconds
        rounded_changes_timestamps = [10 if change == 11 else change for change in changes_timestamps]

        # Combine timestamps and changes
        combined_data_dict[appliance] = list(zip(rounded_changes_timestamps, changes_readings))

    return combined_data_dict

def undo_change_in_points(combined_data):
    # Create a dictionary to store the original data
    original_data = {}

    # Iterate over each key-value pair in the combined_data
    for appliance, data in combined_data.items():
        # Initialize lists to store timestamps and actual water meter readings
        timestamps = []
        readings = []

        # Iterate over each data point
        for i, (timestamp, change) in enumerate(data):
            # Calculate the actual water meter reading
            if i == 0:
                actual_time = timestamp
                actual_reading = change
            else:
                actual_time += timestamp
                actual_reading += change

            # Append the timestamp and actual reading to the lists

            dt_object = datetime.utcfromtimestamp(actual_time)

            a = dt_object.strftime('%m/%d/%Y %H:%M:%S')

            timestamps.append(a)
            readings.append(actual_reading)

        # Store the original data in the dictionary
        original_data[appliance] = list(zip(timestamps, readings))

    return original_data

def extract_features(d):
  final = []
  for data in d:
    # Convert to DataFrame
    df = pd.DataFrame(data, columns=['timestamp', 'reading'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%m/%d/%Y %H:%M:%S')
    df['reading'] = df['reading'].astype(int)/10

    # Calculate duration in minutes
    duration = (df['timestamp'].iloc[-1] - df['timestamp'].iloc[0]).total_seconds()/60

    # Calculate total water consumption
    total_water_consumption = df['reading'].iloc[-1] - df['reading'].iloc[0]

    # Calculate flow rates
    df['flow_rate'] = df['reading'].diff() / (df['timestamp'].diff().dt.total_seconds()/60)


    # Calculate mean, max, min flow rates
    mean_flow_rate = df['flow_rate'].mean()
    max_flow_rate = df['flow_rate'].max()
    min_flow_rate = df['flow_rate'].min()

    # Calculate the number of flow rate changes
    flow_rate_changes = df['flow_rate'].ne(df['flow_rate'].shift()).sum()

    # Calculate the number of spikes (defined as significant jumps in flow rate)
    spike_threshold = 10  # You can adjust the spike threshold as needed
    number_of_spikes = (df['flow_rate'].diff().abs() > spike_threshold).sum()

    # Calculate the duration of high flow (flow rate > certain threshold)
    high_flow_threshold = 5  # You can adjust the high flow threshold as needed
    high_flow_periods = df['flow_rate'] > high_flow_threshold
    duration_of_high_flow = high_flow_periods.sum() * df['timestamp'].diff().dt.total_seconds().mean()

    # Return extracted features
    features = {
        'duration': duration,
        'total_water_consumption': total_water_consumption,
        'mean_flow_rate': mean_flow_rate,
        'max_flow_rate': max_flow_rate,
        'min_flow_rate': min_flow_rate,
        'flow_rate_changes': flow_rate_changes,
        'number_of_spikes': number_of_spikes,
        'duration_of_high_flow': duration_of_high_flow,
    }

    final.append([duration, total_water_consumption, mean_flow_rate, max_flow_rate, min_flow_rate, flow_rate_changes, number_of_spikes, duration_of_high_flow])

  return final

def one_hot(l):
  # Initialize the result list
  result = []

  # Define the items to count
  items = ['dishwasher', 'toilet_tm', 'sink_tm']

  # Iterate over the input list and count occurrences
  for entry in l:
      counts = [entry.count(item) for item in items]
      result.append(counts)

  # Convert result to a numpy array for better representation
  return np.array(result)

def appliance_2(appliance_dict, keys, index=-1):

  gallon_y = {}
  for key in keys:
    gallon_y[key] = 0

  # create new appliance key
  new_appliance = ''
  if len(appliance_dict.keys()) != 1:
    for key in appliance_dict.keys():
      new_appliance += key + " + "
  else:
    new_appliance = list(appliance_dict.keys())[0] + " +  + " + list(appliance_dict.keys())[0] + " + "

  new_dic = {}

  # Find the longer list
  longest_list_key = max(appliance_dict, key=lambda k: len(appliance_dict[k]))
  longest_list = appliance_dict[longest_list_key]
  s = 0
  for i in longest_list:
    s += i[1]
  gallon_y[longest_list_key] = s

  # Find the shorter list
  shorter_list_key = min(appliance_dict, key=lambda k: len(appliance_dict[k]))
  shorter_list = appliance_dict[shorter_list_key][1:]

  new_dic[new_appliance] = longest_list[:]

  c = True
  if index == -1:
    while c:
      index_to_insert = random.randint(0, len(longest_list) - len(shorter_list))

      # Get the difference in the first timestamp between the two lists

      time_diff = longest_list[index_to_insert][0] - shorter_list[0][0]

      if time_diff == 0:
        c = False
        for i in range(len(shorter_list)):
          new_dic[new_appliance][index_to_insert + i] = (
                  shorter_list[i][0],
                  shorter_list[i][1] + longest_list[index_to_insert + i][1],
              )

    return new_appliance, new_dic[new_appliance]

  else:
    # Get the difference in the first timestamp between the two lists

    time_diff = longest_list[index][0] - shorter_list[0][0]
    #print(index)

    if time_diff == 0:

      for i in range(len(shorter_list)):
          new_dic[new_appliance][index + i] = (
                  shorter_list[i][0],
                  shorter_list[i][1] + longest_list[index + i][1],
              )

    return new_appliance, new_dic[new_appliance]



In [None]:
a = change_in_points(appliance_dict)

data = {}
data_test = {}

for index1, key1 in enumerate(list(a.keys())):

  # to add into
  for index2, key2 in enumerate(list(a.keys())):
    # print(key1, key2)

    data[key1] = undo_change_in_points({key1: list(a.values())[index1]})[key1]

    ################## ITERATION 1

    d = {key1: list(a.values())[index1], key2: list(a.values())[index2]}

    longest_list_key = max(d, key=lambda k: len(d[k]))
    shortest_list_key = min(d, key=lambda k: len(d[k]))
    longest_list = d[longest_list_key]
    shorter_list = d[shortest_list_key][1:]

    # print("iteration 1: " + str(len(longest_list) - len(shorter_list)))


    for i in range(len(longest_list) - len(shorter_list)):

      qw, er = appliance_2(d, appliance_dict.keys(), index=i)

      data[qw+str(i)] = undo_change_in_points({qw:er})[qw]

    ################## ITERATION 2

    d = {qw: er, key2: list(a.values())[index2]}

    longest_list_key = max(d, key=lambda k: len(d[k]))
    shortest_list_key = min(d, key=lambda k: len(d[k]))
    longest_list = d[longest_list_key]
    shorter_list = d[shortest_list_key][1:]

    # print("iteration 2: " + str(len(longest_list) - len(shorter_list)))


    for i in range(len(longest_list) - len(shorter_list)):

      qw, er = appliance_2(d, appliance_dict.keys(), index=i)

      data[qw+str(i)] = undo_change_in_points({qw:er})[qw]

    ################## ITERATION 3

    d = {qw: er, key2: list(a.values())[index2]}

    longest_list_key = max(d, key=lambda k: len(d[k]))
    shortest_list_key = min(d, key=lambda k: len(d[k]))
    longest_list = d[longest_list_key]
    shorter_list = d[shortest_list_key][1:]

    # print("iteration 3: " + str(len(longest_list) - len(shorter_list)))

    for i in range(len(longest_list) - len(shorter_list)):

      qw, er = appliance_2(d, appliance_dict.keys(), index=i)

      data[qw+str(i)] = undo_change_in_points({qw:er})[qw]


    ################## ITERATION 4

    d = {qw: er, key2: list(a.values())[index2]}

    longest_list_key = max(d, key=lambda k: len(d[k]))
    shortest_list_key = min(d, key=lambda k: len(d[k]))
    longest_list = d[longest_list_key]
    shorter_list = d[shortest_list_key][1:]

    # print("iteration 4: " + str(len(longest_list) - len(shorter_list)))

    for i in range(len(longest_list) - len(shorter_list)):

      qw, er = appliance_2(d, appliance_dict.keys(), index=i)

      data[qw+str(i)] = undo_change_in_points({qw:er})[qw]

    ################## ITERATION 5

    d = {qw: er, key2: list(a.values())[index2]}

    longest_list_key = max(d, key=lambda k: len(d[k]))
    shortest_list_key = min(d, key=lambda k: len(d[k]))
    longest_list = d[longest_list_key]
    shorter_list = d[shortest_list_key][1:]

    # print("iteration 5: " + str(len(longest_list) - len(shorter_list)))

    for i in range(len(longest_list) - len(shorter_list)):

      qw, er = appliance_2(d, appliance_dict.keys(), index=i)

      data[qw+str(i)] = undo_change_in_points({qw:er})[qw]


xs = [sublist for sublist in data.values()]
ys = [sublist for sublist in data.keys()]


print(xs[30])
print(ys[30])

#xs_test = [sublist for sublist in data_test.values()]
#ys_test = [sublist for sublist in data_test.keys()]

xs = extract_features(xs)
#xs_test = extract_features(xs_test)

inputs = np.array(xs)
outputs = one_hot(ys)

#inputs_test = np.array(xs_test)
#outputs_test = one_hot(ys_test)

print(inputs.shape)
print(outputs.shape)

#print(inputs_test.shape)
#print(outputs_test.shape)

#print(inputs[5])
#print(outputs[5])

[('01/01/1970 00:00:00', 0), ('01/01/1970 00:00:10', 0.10000000000582077), ('01/01/1970 00:00:20', 0.7000000000116415), ('01/01/1970 00:00:30', 1.900000000023283), ('01/01/1970 00:00:40', 2.8000000000174623), ('01/01/1970 00:00:50', 3.2000000000116415), ('01/01/1970 00:01:00', 3.3000000000174623), ('01/01/1970 00:01:21', 3.400000000023283), ('01/01/1970 00:02:23', 3.400000000023283), ('01/01/1970 00:02:55', 3.5), ('01/01/1970 00:03:37', 3.6000000000058208), ('01/01/1970 00:03:47', 3.8000000000174623), ('01/01/1970 00:03:57', 4.0), ('01/01/1970 00:04:07', 4.2000000000116415)]
dishwasher + toilet_tm +  + toilet_tm +  + toilet_tm + 2
(192, 8)
(192, 3)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

xs = [sublist for sublist in data.values()]
ys = [sublist for sublist in data.keys()]

#xs_test = [sublist for sublist in data_test.values()]
#ys_test = [sublist for sublist in data_test.keys()]

xs = extract_features(xs)
#xs_test = extract_features(xs_test)

inputs = np.array(xs)
outputs = one_hot(ys)

#inputs_test = np.array(xs_test)
#outputs_test = one_hot(ys_test)

print(inputs.shape)
print(outputs.shape)

#print(inputs_test.shape)
#print(outputs_test.shape)

input_array = torch.tensor(inputs, dtype=torch.float32)
output_array = torch.tensor(outputs, dtype=torch.float32)

# Ensure input_array and output_array have the correct shapes
assert input_array.shape[0] == output_array.shape[0], "Number of input samples must match number of output samples"
assert input_array.shape[1] == 8, "Each input sample must have 8 features"
assert output_array.shape[1] == 3, "Each output sample must have 3 features"

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(input_array, output_array, test_size=0.2, random_state=42)

class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.output_relu = nn.ReLU()  # Ensure non-negative outputs

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.output_relu(out)  # Apply ReLU to output
        return out

input_size = 8
hidden_size = 10
output_size = 3

model = SimpleNN(input_size, hidden_size, output_size)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 100000

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training complete")

# Testing the model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = model(X_train)
    test_loss = criterion(test_outputs, y_train)
    print(f'Test Loss: {test_loss.item():.4f}')

    # Optionally, you can print test predictions and actual values for comparison
    print("Test Predictions:\n", test_outputs)
    print("Actual Values:\n", y_train)

(192, 8)
(192, 3)
Epoch [100/100000], Loss: 2.5968
Epoch [200/100000], Loss: 2.5505
Epoch [300/100000], Loss: 2.5260
Epoch [400/100000], Loss: 2.5188
Epoch [500/100000], Loss: 2.5120
Epoch [600/100000], Loss: 2.5052
Epoch [700/100000], Loss: 2.4979
Epoch [800/100000], Loss: 2.4920
Epoch [900/100000], Loss: 2.4833
Epoch [1000/100000], Loss: 2.4861
Epoch [1100/100000], Loss: 2.4720
Epoch [1200/100000], Loss: 2.4678
Epoch [1300/100000], Loss: 2.4646
Epoch [1400/100000], Loss: 2.4615
Epoch [1500/100000], Loss: 2.4589
Epoch [1600/100000], Loss: 2.4564
Epoch [1700/100000], Loss: 2.4541
Epoch [1800/100000], Loss: 2.4520
Epoch [1900/100000], Loss: 2.4499
Epoch [2000/100000], Loss: 2.4480
Epoch [2100/100000], Loss: 2.4471
Epoch [2200/100000], Loss: 2.4444
Epoch [2300/100000], Loss: 2.4426
Epoch [2400/100000], Loss: 2.4428
Epoch [2500/100000], Loss: 2.4377
Epoch [2600/100000], Loss: 2.2161
Epoch [2700/100000], Loss: 1.3144
Epoch [2800/100000], Loss: 1.1192
Epoch [2900/100000], Loss: 1.0626
Epoch