In [None]:
# Define a dictionary to hold appliance names as keys and corresponding data groups as values
appliance_tuple = ((
    ["dishwasher"], [
        ('05/04/2024 23:19:28', '01410593'),
        ('05/04/2024 23:19:38', '01410594'),
        ('05/04/2024 23:19:49', '01410599'),
        ('05/04/2024 23:19:59', '01410604'),
        ('05/04/2024 23:20:10', '01410606'),
        ('05/04/2024 23:20:20', '01410610'),
        ('05/04/2024 23:20:31', '01410611'),
        ('05/04/2024 23:20:52', '01410612'),
        ('05/04/2024 23:21:54', '01410612'),
        ('05/04/2024 23:22:26', '01410613'),
        ('05/04/2024 23:23:08', '01410614'),
        ('05/04/2024 23:23:18', '01410616'),
        ('05/04/2024 23:23:28', '01410618'),
        ('05/04/2024 23:23:39', '01410620')
    ]),

    (["toilet_tm"], [
        ('05/05/2024 11:52:26', '01410818'),
        ('05/05/2024 11:52:36', '01410819'),
        ('05/05/2024 11:52:47', '01410826'),
        ('05/05/2024 11:52:57', '01410833'),
    ]),

    (["sink_tm"], [
        ('05/05/2024 11:56:48', '01410833'),
        ('05/05/2024 11:56:58', '01410834'),
    ]),
    (["shower_tm"], [
        ('05/05/2024 12:24:12', '01411062'),
        ('05/05/2024 12:24:22', '01411063'),
        ('05/05/2024 12:24:33', '01411064'),
        ('05/05/2024 12:24:43', '01411065'),
        ('05/05/2024 12:24:54', '01411066'),
        ('05/05/2024 12:25:04', '01411067'),
        ('05/05/2024 12:25:15', '01411068')
    ])
)

appliance_info_dict = {'dishwasher': [0,False], "toilet_tm": [0, False], "sink_tm": [0, True], "shower_tm": [0,True]}

In [None]:
import random
from datetime import datetime, timedelta

class CalibrationDataGenerator():

  def __init__(self, appliance_info_dict, appliance_tuple1):
    """
    Parameters:
    appliance_info_dict (dict): {appliance_string_name: [gallons (int)), continuous flow/fixed flow (boolean)]}
    appliance_tuple (tuple): (appliance_string_name: [tuples (seconds, water meter reading)]}

    """
    self.appliance_gallon_dict = {key: value[0] for key, value in appliance_info_dict.items()}
    self.appliance_boolean_continous_dict = {key: value[1] for key, value in appliance_info_dict.items()}

    self.updated_calibrated_data = self.change_in_points(appliance_tuple1)

    self.x = []
    self.y = []

    # create extended appliances to x and y
    copy = self.updated_calibrated_data.copy()
    for _ in range(2):
      for (a,b,c) in copy:
        if self.appliance_boolean_continous_dict[a[0]] == True:
          appliance, data, output = self.extend_usage_data((a,b))
          self.updated_calibrated_data.append((appliance, data, output))

    # add single and extended appliance to x and y
    for (a,b,c) in self.updated_calibrated_data:
      self.x.append(b)
      self.y.append(c)

    # create and add combined list appliances to x and y
    for _ in range(2):
      copy1 = self.updated_calibrated_data.copy()
      copy2 = self.updated_calibrated_data.copy()
      for _ in range(len(self.updated_calibrated_data)):
        new_copy = []
        for (appliance1, data1, output1) in copy1:
          for (appliance2, data2, output2) in copy2:
            if not (len(appliance1+appliance2) > len(set(appliance1+appliance2))):
              appliance3, data3, output3 = self.combine_lists(data1, data2, appliance1, appliance2, output1)
              self.x.append(data3)
              self.y.append(output3)
              new_copy.append((appliance3, data3, output3))
        copy1 = new_copy

    print(len(self.x))
    print(len(self.y))


  def get_data(self):
    """
    :return: x and y full dataset
    """
    return self.x, self.y


  def change_in_points(self, appliance_tuple):
    """
    Calculate changes in water meter readings and timestamps for each appliance in the tuple.

    :param appliance_tuple: tuple, containing multiple appliances and their usage data
    :return: tuple, containing multiple appliances and their modified usage data
    """
    modified_tuple = []

    # Iterate over each appliance and its data
    for appliance, data in appliance_tuple:

        output =  self.appliance_gallon_dict.copy()

        # Extract timestamps and water meter readings
        timestamps = [entry[0] for entry in data]
        readings = [int(entry[1]) / 10.0 for entry in data]

        # Calculate change in water meter reading
        changes_readings = [readings[i + 1] - readings[i] for i in range(len(readings) - 1)]

        # Calculate change in timestamps
        changes_timestamps = [
            (datetime.strptime(timestamps[i + 1], '%m/%d/%Y %H:%M:%S') - datetime.strptime(timestamps[i], '%m/%d/%Y %H:%M:%S')).total_seconds()
            for i in range(len(timestamps) - 1)
        ]

        # Round down changes in timestamps to the nearest 10 seconds
        rounded_changes_timestamps = [round(change / 10.0) * 10 for change in changes_timestamps]

        # Combine timestamps and changes
        combined_data = list(zip(rounded_changes_timestamps, changes_readings))

        output[appliance[0]] = sum(pair[1] for pair in combined_data)

        # Append the appliance and its modified data to the result list
        modified_tuple.append((appliance, combined_data, list(output.values())))

    return modified_tuple

  def combine_lists(self, large_list, small_list, large_item, small_item, item_dict):

    """
    Combines two lists of (seconds, reading) tuples by inserting elements from the small_list into the large_list.

    The insertion point in the large_list is chosen randomly among the valid points where the first element of
    small_list matches the first element in large_list.

    If the small_list seconds value is less than the large_list seconds value, the small_list element is inserted
    at the corresponding index in the large_list, and the large_list element is adjusted to account for the
    remaining seconds.

    If the small_list seconds value is greater than the large_list seconds value, the insertion index is adjusted
    and the small_list reading is added to the corresponding large_list reading.

    Parameters:
    - large_list: List[Tuple[int, float]]
        The list to insert into, containing tuples of (seconds, reading).
    - small_list: List[Tuple[int, float]]
        The list to insert, containing tuples of (seconds, reading).
    - large_item: Any
        A reference to the item associated with the large_list.
    - small_item: Any
        A reference to the item associated with the small_list.
    - item_dict: Dict[int, float]
        A dictionary where the key is the index of the small_item and the value is the cumulative reading.

    Returns:
    - Tuple:
        - The combined large_item and small_item.
        - The modified large_list with elements from small_list inserted.
        - The updated item_dict with added readings from small_list.
    """

    valid_indices = [i for i, (sec, _) in enumerate(large_list) if sec == small_list[0][0]]

    if not valid_indices:
        raise ValueError("No valid insertion point found where the first elements match.")

    insert_index = random.choice(valid_indices)
    combined_list = large_list[:]

    for i, (sec, reading) in enumerate(small_list):
        try:
            combined_sec, combined_reading = combined_list[insert_index + i]

            if sec < combined_sec:
                combined_list.insert(insert_index + i, (sec, reading))
                combined_sec, combined_reading = combined_list[insert_index + i + 1]
                combined_list[insert_index + i + 1] = (combined_sec - sec, combined_reading)
            elif sec > combined_sec:
                insert_index = insert_index + sec % 10
                _, combined_reading = combined_list[insert_index + i]
                combined_list[insert_index + i] = (sec, combined_reading + reading)
            else:
                combined_list[insert_index + i] = (sec, combined_reading + reading)
        except IndexError:
            combined_list.append((sec, reading))

    key = list(self.appliance_gallon_dict.keys()).index(small_item[0])
    item_dict[key] += sum(pair[1] for pair in small_list)

    return large_item + small_item, combined_list, item_dict

  def extend_usage_data(self, appliance_tuple):
    """
    Extend or reduce the usage data for an appliance by adding or removing a random number of additional points.

    :param appliance_tuple: tuple, (appliance_name, [(seconds, gallons)])
    :return: tuple, (modified_usage_data, [total_gallons], output list)
    """
    appliance_name, usage_data = appliance_tuple

    min_modifications = -len(usage_data) + 1
    max_modifications = len(usage_data) * 100

    output = self.appliance_gallon_dict.copy()

    # Determine the number of points to add or remove randomly within the specified range
    num_modifications = random.randint(min_modifications, max_modifications)

    if num_modifications > 0:
        # Add points
        additional_data = [random.choice(usage_data) for _ in range(num_modifications)]
        modified_data = usage_data + additional_data
    else:
        # Remove points, ensuring we don't remove more points than we have
        if len(usage_data) + num_modifications <= 0:
            modified_data = usage_data[:1]  # Keep at least one point
        else:
            modified_data = usage_data[:num_modifications]

    output[appliance_name[0]] = sum(pair[1] for pair in modified_data)
    return appliance_name, modified_data, list(output.values())



In [None]:
CalibrationDataGenerator(appliance_info_dict, appliance_tuple)

816
816


<__main__.CalibrationDataGenerator at 0x780bfb481420>

In [None]:
for _ in range(2):
  print(_)

0
1
