In [None]:
#@test {"skip": true}

!pip install --quiet --upgrade tensorflow-federated

In [None]:
import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

b'Hello, World!'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

data = pd.read_csv('/content/drive/MyDrive/LAB3-20221109T160610Z-001/LCL-FullData/Cluster18data.csv')

In [None]:
data['DateTime'] = pd.to_datetime(data['DateTime'])

In [None]:
# Filter the DataFrame to include readings from Jan 01, 2012, to Dec 31, 2012
start_date1 = pd.to_datetime('2011-10-01')
end_date1 = pd.to_datetime('2013-02-28')
filtered_data1 = data[(data['DateTime'] >= start_date1) & (data['DateTime'] <= end_date1)]


In [None]:
# Select 20 unique LCLids
lclid_list1 = filtered_data1['LCLid'].unique()
selected_lclids1 = lclid_list1[:20]

# Filter data for the selected LCLids
f_data1 = filtered_data1[filtered_data1['LCLid'].isin(selected_lclids1)]



In [None]:
datan1 = f_data1.copy()
datan1['KWH/hh'] = datan1['KWH/hh'].astype(np.float32)


datan1 = datan1.drop('cluster', axis=1)
datan1 = datan1.drop('stdorToU', axis=1)


In [None]:
datan1.reset_index(drop=True, inplace=True)


In [None]:
datan1['DateTime'] = pd.to_datetime(datan1.DateTime).dt.tz_localize(None)
for i in range(len(datan1)):
  datan1['DateTime'][i]=datan1['DateTime'][i].timestamp()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  datan1['DateTime'][i]=datan1['DateTime'][i].timestamp()


In [None]:
datan1['DateTime'] = datan1['DateTime'].astype(np.float32)


In [None]:
# Sort the data by 'LCLid' and 'DateTime'
datan1.sort_values(['LCLid', 'DateTime'], inplace=True)


In [None]:
# Define the client window dataset function for a specific LCLid
def create_client_dataset_for_LCLid(client_data, window_size, step_size):
    client_windows = []
    client_targets = []
    num_readings = len(client_data)

    # Iterate over the readings using the sliding window
    for i in range(0, num_readings - window_size, step_size):
        window_start = i
        window_end = i + window_size - 1
        prediction_index = window_end + step_size

        # Extract the window and the prediction target
        window = client_data.iloc[window_start:window_end + 1]['KWH/hh'].values
        target = client_data.iloc[prediction_index]['KWH/hh']

        client_windows.append(window)
        client_targets.append(target)

    # Create an ordered dictionary with 'x' and 'y' keys
    ordered_dict = collections.OrderedDict()
    ordered_dict['x'] = tf.stack(client_windows)
    ordered_dict['y'] = tf.expand_dims(client_targets, axis=-1)


    return ordered_dict

In [None]:
window_size = 336
step_size = 1

# Filter the dataframe for the specific LCLid
example_LCLid = datan1['LCLid'].unique()[3]
clientyy_data = datan1[datan1['LCLid'] == example_LCLid]

# Create the client dataset for the specific LCLid
example_client_dataset = create_client_dataset_for_LCLid(clientyy_data, window_size, step_size)

print("Client dataset for LCLid", example_LCLid)
print(example_client_dataset)

Client dataset for LCLid MAC001739
OrderedDict([('x', <tf.Tensor: shape=(12510, 336), dtype=float32, numpy=
array([[0.84 , 0.913, 0.751, ..., 0.342, 0.465, 0.359],
       [0.913, 0.751, 0.87 , ..., 0.465, 0.359, 0.417],
       [0.751, 0.87 , 0.765, ..., 0.359, 0.417, 0.409],
       ...,
       [1.125, 1.066, 1.066, ..., 1.681, 1.993, 1.611],
       [1.066, 1.066, 1.349, ..., 1.993, 1.611, 1.626],
       [1.066, 1.349, 1.679, ..., 1.611, 1.626, 2.65 ]], dtype=float32)>), ('y', <tf.Tensor: shape=(12510, 1), dtype=float32, numpy=
array([[0.417],
       [0.409],
       [1.011],
       ...,
       [1.626],
       [2.65 ],
       [1.972]], dtype=float32)>)])


In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 12
SHUFFLE_BUFFER = 60
PREFETCH_BUFFER = 6

def preprocess_client_dataset(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(
            x=tf.reshape(element['x'], [-1, 336]),
            y=tf.reshape(element['y'], [-1, 1]))
    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(
        BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)


preprocessed_example_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(example_client_dataset))


sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_client_dataset)))

sample_batch

OrderedDict([('x',
              array([[0.847, 0.724, 0.913, ..., 0.797, 0.654, 0.623],
                     [1.52 , 0.655, 0.495, ..., 0.699, 0.449, 0.426],
                     [0.751, 0.87 , 0.765, ..., 0.359, 0.417, 0.409],
                     ...,
                     [0.574, 0.567, 0.525, ..., 0.466, 0.406, 0.686],
                     [1.095, 1.249, 0.574, ..., 0.454, 0.425, 0.466],
                     [0.559, 0.498, 0.408, ..., 0.809, 0.834, 0.717]], dtype=float32)),
             ('y',
              array([[0.603],
                     [0.454],
                     [1.011],
                     [0.524],
                     [0.421],
                     [0.422],
                     [0.699],
                     [0.717],
                     [0.414],
                     [0.774],
                     [0.406],
                     [0.404]], dtype=float32))])

In [None]:
import random

NUM_CLIENTS = 12  # Replace with desired number of clients
all_clients = datan1['LCLid'].unique()
sample_clients = random.sample(all_clients.tolist(), NUM_CLIENTS)

sample_clients_list = sample_clients
sample_clients_list

['MAC004593',
 'MAC004993',
 'MAC001739',
 'MAC002213',
 'MAC002849',
 'MAC001735',
 'MAC000105',
 'MAC002627',
 'MAC001315',
 'MAC003257',
 'MAC002872',
 'MAC004319']

In [None]:
# Iterate over unique LCLids in the dataframe
client_datasets_12 = {}
for LCLid in sample_clients_list:
    # Filter the dataframe for the current LCLid
    client_data = datan1[datan1['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    client_datasets_12[LCLid] = preprocessed_client_dataset

    print("Client dataset for LCLid", LCLid)
    print(sam_batch)

Client dataset for LCLid MAC004593
OrderedDict([('x', array([[2.623, 1.518, 1.383, ..., 1.7  , 1.634, 1.438],
       [0.688, 0.624, 0.674, ..., 0.721, 0.721, 0.888],
       [0.624, 0.674, 0.695, ..., 0.721, 0.888, 0.843],
       ...,
       [0.669, 0.624, 0.68 , ..., 0.706, 0.652, 0.725],
       [0.607, 0.669, 0.624, ..., 0.69 , 0.706, 0.652],
       [0.962, 1.232, 1.222, ..., 0.954, 0.817, 0.831]], dtype=float32)), ('y', array([[1.439],
       [0.843],
       [0.856],
       [0.721],
       [0.732],
       [1.728],
       [1.169],
       [1.438],
       [2.099],
       [0.601],
       [0.725],
       [0.949]], dtype=float32))])
Client dataset for LCLid MAC004993
OrderedDict([('x', array([[1.314, 1.843, 5.223, ..., 2.91 , 2.987, 2.579],
       [3.727, 3.539, 2.925, ..., 2.224, 2.252, 2.374],
       [1.641, 1.611, 1.561, ..., 1.717, 1.5  , 1.466],
       ...,
       [2.457, 2.723, 3.065, ..., 1.989, 1.719, 1.961],
       [1.767, 1.805, 1.66 , ..., 2.362, 2.682, 2.201],
       [1.631, 1.

In [None]:
def make_federated_data(client_datasets, sample_clients_list):
    return [
        client_datasets[x] for x in sample_clients_list
    ]

In [None]:
federated_train_data_12 = make_federated_data(client_datasets_12, sample_clients)


In [None]:
print(f'Number of client datasets: {len(federated_train_data_12)}')
print(f'First dataset: {federated_train_data_12[0]}')
print(f'Second dataset: {federated_train_data_12[1]}')

Number of client datasets: 12
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 336), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))])>
Second dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 336), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))])>


In [None]:
preprocessed_example_client_dataset.element_spec

OrderedDict([('x', TensorSpec(shape=(None, 336), dtype=tf.float32, name=None)),
             ('y', TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))])

In [None]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(16, activation='relu', input_shape=(336,)),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(4, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    return model

In [None]:
def model_fn():
    keras_model = create_model()
    loss = tf.keras.losses.MeanAbsoluteError()
    tff_model = tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_client_dataset.element_spec,
        loss=loss,
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )
    return tff_model

In [None]:
model = model_fn()
print(model)

<tensorflow_federated.python.learning.models.keras_utils._KerasModel object at 0x7f770e01bdf0>


In [None]:
# training starts
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

In [None]:
print(training_process.initialize.type_signature.formatted_representation())

( -> <
  global_model_weights=<
    trainable=<
      float32[336,64],
      float32[64],
      float32[64,32],
      float32[32],
      float32[32,8],
      float32[8],
      float32[8,1],
      float32[1]
    >,
    non_trainable=<>
  >,
  distributor=<>,
  client_work=<>,
  aggregator=<
    value_sum_process=<>,
    weight_sum_process=<>
  >,
  finalizer=<
    int64,
    float32[336,64],
    float32[64],
    float32[64,32],
    float32[32],
    float32[32,8],
    float32[8],
    float32[8,1],
    float32[1]
  >
>@SERVER)


In [None]:
train_state = training_process.initialize()

In [None]:
result = training_process.next(train_state, federated_train_data_12)
train_state = result.state
train_metrics = result.metrics
print('round  1, metrics={}'.format(train_metrics))

round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('root_mean_squared_error', 0.4182494), ('loss', 0.26426378), ('num_examples', 812030), ('num_batches', 67675)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])


In [None]:
NUM_ROUNDS = 21
for round_num in range(2, NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data_12)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('root_mean_squared_error', 0.37692037), ('loss', 0.23434481), ('num_examples', 812030), ('num_batches', 67675)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  3, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('root_mean_squared_error', 0.36519673), ('loss', 0.22515282), ('num_examples', 812030), ('num_batches', 67675)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  4, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('root_mean_squared_error', 0.35881412), ('loss', 0.21980815), ('num_examples', 812030), ('num_batches', 67675)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), (

In [None]:
import plotly.graph_objects as go

# Data for Cluster 08
rounds_1 = list(range(1, 21))
mae_1 = [
    0.06737575, 0.06293772, 0.060796063, 0.05940758, 0.058408998,
    0.057648093, 0.05703, 0.056526024, 0.05610915, 0.055751048,
    0.05542996, 0.055151854, 0.054903667, 0.05467346, 0.05447607,
    0.054296006, 0.054134406, 0.053993754, 0.05387066, 0.053758096
]

# Data for Cluster 09
rounds_2 = list(range(1, 21))
mae_2 = [
    0.07034567, 0.06416719, 0.061836798, 0.060558915, 0.059689056,
    0.059010174, 0.058474753, 0.058048673, 0.057682555, 0.057362556,
    0.057089813, 0.056855284, 0.056644276, 0.056458063, 0.05630099,
    0.056166247, 0.05603734, 0.055925187, 0.05582829, 0.05573636
]

# Creating trace for Cluster 08
trace1 = go.Scatter(
    x=rounds_1,
    y=mae_1,
    mode='lines',
    name='Cluster 08'
)

# Creating trace for Cluster 09
trace2 = go.Scatter(
    x=rounds_2,
    y=mae_2,
    mode='lines',
    name='Cluster 09'
)

# Creating data list
data = [trace1, trace2]

# Creating layout
layout = go.Layout(
    title='Mean Absolute Error (MAE) of Load Forecasting vs. Federated Rounds',
    xaxis=dict(title='Federated Rounds'),
    yaxis=dict(title='Training data - MAE')
)

# Creating figure
fig = go.Figure(data=data, layout=layout)

# Displaying the graph
fig.show()


In [None]:
import plotly.graph_objects as go

# Data for Cluster 04
rounds_1 = list(range(1, 21))
mae_1 = [
    0.11573566, 0.10361333, 0.09922377, 0.09694171, 0.09549518,
    0.094502784, 0.09377624, 0.09319854, 0.09278589, 0.09244496,
    0.0921646, 0.09189611, 0.09168941, 0.09149853, 0.09130679,
    0.09120082, 0.09106272, 0.09095054, 0.09086295, 0.090747595
]

# Data for Cluster 14
rounds_2 = list(range(1, 21))
mae_2 = [
    0.1708811, 0.1584751, 0.15332645, 0.15120173, 0.14984801,
    0.14892414, 0.14833829, 0.14782089, 0.14744286, 0.14694408,
    0.14646958, 0.14602049, 0.14571527, 0.14540415, 0.14500421,
    0.1446545, 0.14432915, 0.14402962, 0.14367345, 0.14337786
]

# Creating trace for Cluster 04
trace1 = go.Scatter(
    x=rounds_1,
    y=mae_1,
    mode='lines',
    name='Cluster 04'
)

# Creating trace for Cluster 14
trace2 = go.Scatter(
    x=rounds_2,
    y=mae_2,
    mode='lines',
    name='Cluster 14'
)

# Creating data list
data = [trace1, trace2]

# Creating layout
layout = go.Layout(
    title='Mean Absolute Error (MAE) of Load Forecasting vs. Federated Rounds',
    xaxis=dict(title='Federated Rounds'),
    yaxis=dict(title='Training data - MAE')
)

# Creating figure
fig = go.Figure(data=data, layout=layout)

# Displaying the graph
fig.show()


In [None]:
import plotly.graph_objects as go

# Data for Cluster 07
rounds_1 = list(range(1, 21))
mae_1 = [
    0.24215707, 0.22306563, 0.21767606, 0.21497612, 0.21307865,
    0.21194346, 0.21102037, 0.2100187, 0.20892245, 0.2077742,
    0.20704211, 0.20606637, 0.20509331, 0.20452215, 0.20378909,
    0.20307666, 0.2025013, 0.20195374, 0.20134038, 0.20096558
]

# Data for Cluster 18
rounds_2 = list(range(1, 21))
mae_2 = [
    0.25958988, 0.2282181, 0.21869427, 0.21352097, 0.21052141,
    0.20815776, 0.20585926, 0.2046754, 0.20356816, 0.20232281,
    0.20153011, 0.2007333, 0.20026353, 0.19920154, 0.19846562,
    0.19802547, 0.19739366, 0.19712806, 0.19636787, 0.19576551
]

# Creating trace for Cluster 07
trace1 = go.Scatter(
    x=rounds_1,
    y=mae_1,
    mode='lines',
    name='Cluster 07'
)

# Creating trace for Cluster 18
trace2 = go.Scatter(
    x=rounds_2,
    y=mae_2,
    mode='lines',
    name='Cluster 18'
)

# Creating data list
data = [trace1, trace2]

# Creating layout
layout = go.Layout(
    title='Mean Absolute Error (MAE) of Load Forecasting vs. Federated Rounds',
    xaxis=dict(title='Federated Rounds'),
    yaxis=dict(title='Training data - MAE')
)

# Creating figure
fig = go.Figure(data=data, layout=layout)

# Displaying the graph
fig.show()


In [None]:
import plotly.graph_objects as go

# Data for Cluster - 08, Batch Size = 24, Shuffle Buffer = 120, Prefetch Buffer = 12
rounds_1 = list(range(1, 16))
mae_1 = [
    0.07406851, 0.06911341, 0.06588133, 0.06381104, 0.062328234,
    0.061173238, 0.060279164, 0.059583493, 0.059030402, 0.058575064,
    0.05819059, 0.05786198, 0.05756866, 0.05733508, 0.057126228
]

# Data for Cluster - 08, Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6
rounds_2 = list(range(1, 16))
mae_2 = [
    0.070309095, 0.06450078, 0.061607864, 0.060092628, 0.059076704,
    0.05836942, 0.057809025, 0.057371534, 0.05702302, 0.056733135,
    0.056459244, 0.056229576, 0.056033693, 0.055863913, 0.055711426
]

# Creating trace for Cluster - 08, Batch Size = 24, Shuffle Buffer = 120, Prefetch Buffer = 12
trace1 = go.Scatter(
    x=rounds_1,
    y=mae_1,
    mode='lines',
    name='Batch Size = 24, Shuffle Buffer = 120, Prefetch Buffer = 12'
)

# Creating trace for Cluster - 08, Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6
trace2 = go.Scatter(
    x=rounds_2,
    y=mae_2,
    mode='lines',
    name='Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6'
)

# Creating data list
data = [trace1, trace2]

# Creating layout
layout = go.Layout(
    title='Mean Absolute Error (MAE) vs. Federated Rounds with Learning Rate - 0.02',
    xaxis=dict(title='Federated Rounds'),
    yaxis=dict(title='MAE')
)

# Creating figure
fig = go.Figure(data=data, layout=layout)

# Displaying the graph
fig.show()


In [None]:
import plotly.graph_objects as go

# Data for Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6, Learning Rate = 0.02
rounds_1 = list(range(1, 16))
mae_1 = [
    0.070309095, 0.06450078, 0.061607864, 0.060092628, 0.059076704,
    0.05836942, 0.057809025, 0.057371534, 0.05702302, 0.056733135,
    0.056459244, 0.056229576, 0.056033693, 0.055863913, 0.055711426
]

# Data for Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6, Learning Rate = 0.01
rounds_2 = list(range(1, 16))
mae_2 = [
    0.06575257, 0.059883647, 0.05776895, 0.056706358, 0.056026068,
    0.055522144, 0.05512807, 0.05480615, 0.054533303, 0.05431394,
    0.054116458, 0.053937282, 0.053791393, 0.05366001, 0.05353039
]

# Creating trace for Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6, Learning Rate = 0.02
trace1 = go.Scatter(
    x=rounds_1,
    y=mae_1,
    mode='lines',
    name='Learning Rate = 0.02'
)

# Creating trace for Batch Size = 12, Shuffle Buffer = 60, Prefetch Buffer = 6, Learning Rate = 0.01
trace2 = go.Scatter(
    x=rounds_2,
    y=mae_2,
    mode='lines',
    name='Learning Rate = 0.01'
)

# Creating data list
data = [trace1, trace2]

# Creating layout
layout = go.Layout(
    title='Mean Absolute Error (MAE) vs. Federated Rounds',
    xaxis=dict(title='Federated Rounds'),
    yaxis=dict(title='MAE')
)

# Creating figure
fig = go.Figure(data=data, layout=layout)

# Displaying the graph
fig.show()


FEDERATED EVALUATION

In [None]:
# Filter data for the specified time period
start_date = '2013-03-01'
end_date = '2014-02-28'
filtered_data = data[(data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)]

# Calculate the average KWH/hh for each LCLid
average_kwh = filtered_data.groupby('LCLid')['KWH/hh'].mean()

# Find the LCLids with the lowest, highest, and medium average KWH/hh
lowest_avg_lclid = average_kwh.idxmin()
highest_avg_lclid = average_kwh.idxmax()
medium_avg_lclid = average_kwh.sort_values().index[len(average_kwh) // 2]

lowest_avg_lclid, highest_avg_lclid, medium_avg_lclid

('MAC000946', 'MAC001715', 'MAC001928')

In [None]:
# Assuming your data is stored in a DataFrame called 'data'
# Calculate the average KWH/hh for each LCLid
average_kwh = filtered_data.groupby('LCLid')['KWH/hh'].mean()

# Print all LCLids with their corresponding average KWH/hh
for lclid, avg_kwh in average_kwh.items():
    print(f"LCLid: {lclid}, Average KWH/hh: {avg_kwh}")

LCLid: MAC000027, Average KWH/hh: 0.17755218584916457
LCLid: MAC000033, Average KWH/hh: 0.15519773494823544
LCLid: MAC000041, Average KWH/hh: 0.18327049136303838
LCLid: MAC000067, Average KWH/hh: 0.12797408911514044
LCLid: MAC000118, Average KWH/hh: 0.17898346965051765
LCLid: MAC000123, Average KWH/hh: 0.15642424935087218
LCLid: MAC000127, Average KWH/hh: 0.1898002173358499
LCLid: MAC000143, Average KWH/hh: 0.18460687526740263
LCLid: MAC000154, Average KWH/hh: 0.13875990872540633
LCLid: MAC000175, Average KWH/hh: 0.16180844248698736
LCLid: MAC000179, Average KWH/hh: 0.11634280810408922
LCLid: MAC000184, Average KWH/hh: 0.19160761172258323
LCLid: MAC000190, Average KWH/hh: 0.138320386689166
LCLid: MAC000208, Average KWH/hh: 0.1918430357255016
LCLid: MAC000214, Average KWH/hh: 0.1620173873312743
LCLid: MAC000219, Average KWH/hh: 0.12781214963962934
LCLid: MAC000228, Average KWH/hh: 0.1295455532971118
LCLid: MAC000239, Average KWH/hh: 0.15234770920894583
LCLid: MAC000244, Average KWH/hh: 

In [None]:
filtered_data44 = data[data['LCLid'] == 'MAC004593']

In [None]:
# Filter the DataFrame to include readings from Jan 01, 2014, to Jan 31, 2014
teststart_date1 = pd.to_datetime('2014-02-14')
testend_date1 = pd.to_datetime('2014-02-21')
test_data1 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date1) & (filtered_data44['DateTime'] <= testend_date1)]

teststart_date2 = pd.to_datetime('2014-02-15')
testend_date2 = pd.to_datetime('2014-02-22')
test_data2 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date2) & (filtered_data44['DateTime'] <= testend_date2)]


teststart_date3 = pd.to_datetime('2014-02-16')
testend_date3 = pd.to_datetime('2014-02-23')
test_data3 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date3) & (filtered_data44['DateTime'] <= testend_date3)]

teststart_date4 = pd.to_datetime('2014-02-17')
testend_date4 = pd.to_datetime('2014-02-24')
test_data4 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date4) & (filtered_data44['DateTime'] <= testend_date4)]

teststart_date5 = pd.to_datetime('2014-02-18')
testend_date5 = pd.to_datetime('2014-02-25')
test_data5 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date5) & (filtered_data44['DateTime'] <= testend_date5)]

teststart_date6 = pd.to_datetime('2014-02-19')
testend_date6 = pd.to_datetime('2014-02-26')
test_data6 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date6) & (filtered_data44['DateTime'] <= testend_date6)]

teststart_date7 = pd.to_datetime('2014-02-20')
testend_date7 = pd.to_datetime('2014-02-27')
test_data7 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date7) & (filtered_data44['DateTime'] <= testend_date7)]

# teststart_date8 = pd.to_datetime('2013-10-01')
# testend_date8 = pd.to_datetime('2013-10-31')
# test_data8 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date8) & (filtered_data44['DateTime'] <= testend_date8)]

# teststart_date9 = pd.to_datetime('2013-11-01')
# testend_date9 = pd.to_datetime('2013-11-30')
# test_data9 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date9) & (filtered_data44['DateTime'] <= testend_date9)]

# teststart_date10 = pd.to_datetime('2013-12-01')
# testend_date10 = pd.to_datetime('2013-12-31')
# test_data10 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date10) & (filtered_data44['DateTime'] <= testend_date10)]

# teststart_date11 = pd.to_datetime('2014-01-01')
# testend_date11 = pd.to_datetime('2014-01-31')
# test_data11 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date11) & (filtered_data44['DateTime'] <= testend_date11)]

# teststart_date12 = pd.to_datetime('2014-02-01')
# testend_date12 = pd.to_datetime('2014-02-28')
# test_data12 = filtered_data44[(filtered_data44['DateTime'] >= teststart_date12) & (filtered_data44['DateTime'] <= testend_date12)]



In [None]:
test_data5

Unnamed: 0,LCLid,stdorToU,DateTime,KWH/hh,cluster
375271,MAC004593,ToU,2014-02-18 00:00:00,0.871,17
375272,MAC004593,ToU,2014-02-18 00:30:00,0.822,17
375273,MAC004593,ToU,2014-02-18 01:00:00,0.746,17
375274,MAC004593,ToU,2014-02-18 01:30:00,0.660,17
375275,MAC004593,ToU,2014-02-18 02:00:00,0.738,17
...,...,...,...,...,...
375603,MAC004593,ToU,2014-02-24 22:00:00,0.784,17
375604,MAC004593,ToU,2014-02-24 22:30:00,0.774,17
375605,MAC004593,ToU,2014-02-24 23:00:00,0.805,17
375606,MAC004593,ToU,2014-02-24 23:30:00,0.797,17


In [None]:
# Select unique LCLids
lclid_lt1 = test_data1['LCLid'].unique()
sel_lclids1 = lclid_lt1[:1]
# Filter data for the selected LCLids
ft_data1 = test_data1[test_data1['LCLid'].isin(sel_lclids1)]

# Select unique LCLids
lclid_lt2 = test_data2['LCLid'].unique()
sel_lclids2 = lclid_lt2[:1]
# Filter data for the selected LCLids
ft_data2 = test_data2[test_data2['LCLid'].isin(sel_lclids2)]

# Select unique LCLids
lclid_lt3 = test_data3['LCLid'].unique()
sel_lclids3 = lclid_lt3[:1]
# Filter data for the selected LCLids
ft_data3 = test_data3[test_data3['LCLid'].isin(sel_lclids3)]

# Select unique LCLids
lclid_lt4 = test_data4['LCLid'].unique()
sel_lclids4 = lclid_lt4[:1]
# Filter data for the selected LCLids
ft_data4 = test_data4[test_data4['LCLid'].isin(sel_lclids4)]

# Select unique LCLids
lclid_lt5 = test_data5['LCLid'].unique()
sel_lclids5 = lclid_lt5[:1]
# Filter data for the selected LCLids
ft_data5 = test_data5[test_data5['LCLid'].isin(sel_lclids5)]

# Select unique LCLids
lclid_lt6 = test_data6['LCLid'].unique()
sel_lclids6 = lclid_lt6[:1]
# Filter data for the selected LCLids
ft_data6 = test_data6[test_data6['LCLid'].isin(sel_lclids6)]

# Select unique LCLids
lclid_lt7 = test_data7['LCLid'].unique()
sel_lclids7 = lclid_lt7[:1]
# Filter data for the selected LCLids
ft_data7 = test_data7[test_data7['LCLid'].isin(sel_lclids7)]

# # Select unique LCLids
# lclid_lt8 = test_data8['LCLid'].unique()
# sel_lclids8 = lclid_lt8[:1]
# # Filter data for the selected LCLids
# ft_data8 = test_data8[test_data8['LCLid'].isin(sel_lclids8)]

# # Select unique LCLids
# lclid_lt9 = test_data9['LCLid'].unique()
# sel_lclids9 = lclid_lt9[:1]
# # Filter data for the selected LCLids
# ft_data9 = test_data9[test_data9['LCLid'].isin(sel_lclids9)]

# # Select unique LCLids
# lclid_lt10 = test_data10['LCLid'].unique()
# sel_lclids10 = lclid_lt10[:1]
# # Filter data for the selected LCLids
# ft_data10 = test_data10[test_data10['LCLid'].isin(sel_lclids10)]

# # Select unique LCLids
# lclid_lt11 = test_data11['LCLid'].unique()
# sel_lclids11 = lclid_lt11[:1]
# # Filter data for the selected LCLids
# ft_data11 = test_data11[test_data11['LCLid'].isin(sel_lclids11)]

# # Select unique LCLids
# lclid_lt12 = test_data12['LCLid'].unique()
# sel_lclids12 = lclid_lt12[:1]
# # Filter data for the selected LCLids
# ft_data12 = test_data12[test_data12['LCLid'].isin(sel_lclids12)]

In [None]:
ft_data1.reset_index(drop=True, inplace=True)
ft_data2.reset_index(drop=True, inplace=True)
ft_data3.reset_index(drop=True, inplace=True)
ft_data4.reset_index(drop=True, inplace=True)
ft_data5.reset_index(drop=True, inplace=True)
ft_data6.reset_index(drop=True, inplace=True)
ft_data7.reset_index(drop=True, inplace=True)
# ft_data8.reset_index(drop=True, inplace=True)
# ft_data9.reset_index(drop=True, inplace=True)
# ft_data10.reset_index(drop=True, inplace=True)
# ft_data11.reset_index(drop=True, inplace=True)
# ft_data12.reset_index(drop=True, inplace=True)

In [None]:
ft_data1['KWH/hh'] = ft_data1['KWH/hh'].astype(np.float32)
ft_data2['KWH/hh'] = ft_data2['KWH/hh'].astype(np.float32)
ft_data3['KWH/hh'] = ft_data3['KWH/hh'].astype(np.float32)
ft_data4['KWH/hh'] = ft_data4['KWH/hh'].astype(np.float32)
ft_data5['KWH/hh'] = ft_data5['KWH/hh'].astype(np.float32)
ft_data6['KWH/hh'] = ft_data6['KWH/hh'].astype(np.float32)
ft_data7['KWH/hh'] = ft_data7['KWH/hh'].astype(np.float32)
# ft_data8['KWH/hh'] = ft_data8['KWH/hh'].astype(np.float32)
# ft_data9['KWH/hh'] = ft_data9['KWH/hh'].astype(np.float32)
# ft_data10['KWH/hh'] = ft_data10['KWH/hh'].astype(np.float32)
# ft_data11['KWH/hh'] = ft_data11['KWH/hh'].astype(np.float32)
# ft_data12['KWH/hh'] = ft_data12['KWH/hh'].astype(np.float32)


In [None]:
ft_data1 = ft_data1.drop('cluster', axis=1)
ft_data1 = ft_data1.drop('stdorToU', axis=1)

ft_data2 = ft_data2.drop('cluster', axis=1)
ft_data2 = ft_data2.drop('stdorToU', axis=1)

ft_data3 = ft_data3.drop('cluster', axis=1)
ft_data3 = ft_data3.drop('stdorToU', axis=1)

ft_data4 = ft_data4.drop('cluster', axis=1)
ft_data4 = ft_data4.drop('stdorToU', axis=1)

ft_data5 = ft_data5.drop('cluster', axis=1)
ft_data5 = ft_data5.drop('stdorToU', axis=1)

ft_data6 = ft_data6.drop('cluster', axis=1)
ft_data6 = ft_data6.drop('stdorToU', axis=1)

ft_data7 = ft_data7.drop('cluster', axis=1)
ft_data7 = ft_data7.drop('stdorToU', axis=1)

# ft_data8 = ft_data8.drop('cluster', axis=1)
# ft_data8 = ft_data8.drop('stdorToU', axis=1)

# ft_data9 = ft_data9.drop('cluster', axis=1)
# ft_data9 = ft_data9.drop('stdorToU', axis=1)

# ft_data10 = ft_data10.drop('cluster', axis=1)
# ft_data10 = ft_data10.drop('stdorToU', axis=1)

# ft_data11 = ft_data11.drop('cluster', axis=1)
# ft_data11 = ft_data11.drop('stdorToU', axis=1)

# ft_data12 = ft_data12.drop('cluster', axis=1)
# ft_data12 = ft_data12.drop('stdorToU', axis=1)




In [None]:
ft_data1['DateTime'] = pd.to_datetime(ft_data1.DateTime).dt.tz_localize(None)
for i in range(len(ft_data1)):
  ft_data1['DateTime'][i]=ft_data1['DateTime'][i].timestamp()

ft_data2['DateTime'] = pd.to_datetime(ft_data2.DateTime).dt.tz_localize(None)
for i in range(len(ft_data2)):
  ft_data2['DateTime'][i]=ft_data2['DateTime'][i].timestamp()

ft_data3['DateTime'] = pd.to_datetime(ft_data3.DateTime).dt.tz_localize(None)
for i in range(len(ft_data3)):
  ft_data3['DateTime'][i]=ft_data3['DateTime'][i].timestamp()

ft_data4['DateTime'] = pd.to_datetime(ft_data4.DateTime).dt.tz_localize(None)
for i in range(len(ft_data4)):
  ft_data4['DateTime'][i]=ft_data4['DateTime'][i].timestamp()

ft_data5['DateTime'] = pd.to_datetime(ft_data5.DateTime).dt.tz_localize(None)
for i in range(len(ft_data5)):
  ft_data5['DateTime'][i]=ft_data5['DateTime'][i].timestamp()

ft_data6['DateTime'] = pd.to_datetime(ft_data6.DateTime).dt.tz_localize(None)
for i in range(len(ft_data6)):
  ft_data6['DateTime'][i]=ft_data6['DateTime'][i].timestamp()

ft_data7['DateTime'] = pd.to_datetime(ft_data7.DateTime).dt.tz_localize(None)
for i in range(len(ft_data7)):
  ft_data7['DateTime'][i]=ft_data7['DateTime'][i].timestamp()

# ft_data8['DateTime'] = pd.to_datetime(ft_data8.DateTime).dt.tz_localize(None)
# for i in range(len(ft_data8)):
#   ft_data8['DateTime'][i]=ft_data8['DateTime'][i].timestamp()

# ft_data9['DateTime'] = pd.to_datetime(ft_data9.DateTime).dt.tz_localize(None)
# for i in range(len(ft_data9)):
#   ft_data9['DateTime'][i]=ft_data9['DateTime'][i].timestamp()

# ft_data10['DateTime'] = pd.to_datetime(ft_data10.DateTime).dt.tz_localize(None)
# for i in range(len(ft_data10)):
#   ft_data10['DateTime'][i]=ft_data10['DateTime'][i].timestamp()

# ft_data11['DateTime'] = pd.to_datetime(ft_data11.DateTime).dt.tz_localize(None)
# for i in range(len(ft_data11)):
#   ft_data11['DateTime'][i]=ft_data11['DateTime'][i].timestamp()

# ft_data12['DateTime'] = pd.to_datetime(ft_data12.DateTime).dt.tz_localize(None)
# for i in range(len(ft_data12)):
#   ft_data12['DateTime'][i]=ft_data12['DateTime'][i].timestamp()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ft_data1['DateTime'][i]=ft_data1['DateTime'][i].timestamp()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ft_data2['DateTime'][i]=ft_data2['DateTime'][i].timestamp()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ft_data3['DateTime'][i]=ft_data3['DateTime'][i].timestamp()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

In [None]:
ft_data1.dtypes

LCLid        object
DateTime     object
KWH/hh      float32
dtype: object

In [None]:
ft_data1['DateTime'] = ft_data1['DateTime'].astype(np.float32)
ft_data2['DateTime'] = ft_data2['DateTime'].astype(np.float32)
ft_data3['DateTime'] = ft_data3['DateTime'].astype(np.float32)
ft_data4['DateTime'] = ft_data4['DateTime'].astype(np.float32)
ft_data5['DateTime'] = ft_data5['DateTime'].astype(np.float32)
ft_data6['DateTime'] = ft_data6['DateTime'].astype(np.float32)
ft_data7['DateTime'] = ft_data7['DateTime'].astype(np.float32)
# ft_data8['DateTime'] = ft_data8['DateTime'].astype(np.float32)
# ft_data9['DateTime'] = ft_data9['DateTime'].astype(np.float32)
# ft_data10['DateTime'] = ft_data10['DateTime'].astype(np.float32)
# ft_data11['DateTime'] = ft_data11['DateTime'].astype(np.float32)
# ft_data12['DateTime'] = ft_data12['DateTime'].astype(np.float32)




In [None]:
# Sort the data by 'LCLid' and 'DateTime'
ft_data1.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data2.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data3.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data4.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data5.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data6.sort_values(['LCLid', 'DateTime'], inplace=True)
ft_data7.sort_values(['LCLid', 'DateTime'], inplace=True)
# ft_data8.sort_values(['LCLid', 'DateTime'], inplace=True)
# ft_data9.sort_values(['LCLid', 'DateTime'], inplace=True)
# ft_data10.sort_values(['LCLid', 'DateTime'], inplace=True)
# ft_data11.sort_values(['LCLid', 'DateTime'], inplace=True)
# ft_data12.sort_values(['LCLid', 'DateTime'], inplace=True)


In [None]:
test_client = ft_data1['LCLid'].unique()

test_client

array(['MAC004593'], dtype=object)

In [None]:
test_dataset1 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data1[ft_data1['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset1[LCLid] = preprocessed_client_dataset

    print("Client dataset for LCLid", LCLid)
    print(sam_batch)

Client dataset for LCLid MAC004593
OrderedDict([('x', array([[0.828, 0.865, 0.701, ..., 1.246, 1.244, 1.129],
       [0.828, 0.865, 0.701, ..., 1.246, 1.244, 1.129],
       [0.828, 0.865, 0.701, ..., 1.246, 1.244, 1.129],
       [0.828, 0.865, 0.701, ..., 1.246, 1.244, 1.129],
       [0.828, 0.865, 0.701, ..., 1.246, 1.244, 1.129]], dtype=float32)), ('y', array([[0.874],
       [0.874],
       [0.874],
       [0.874],
       [0.874]], dtype=float32))])


In [None]:
test_dataset2 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data2[ft_data2['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset2[LCLid] = preprocessed_client_dataset


In [None]:
test_dataset3 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data3[ft_data3['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset3[LCLid] = preprocessed_client_dataset


In [None]:
test_dataset4 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data4[ft_data4['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset4[LCLid] = preprocessed_client_dataset

In [None]:
test_dataset5 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data5[ft_data5['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset5[LCLid] = preprocessed_client_dataset

In [None]:
test_dataset6 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data6[ft_data6['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset6[LCLid] = preprocessed_client_dataset

In [None]:
test_dataset7 = {}

for LCLid in test_client:
    # Filter the dataframe for the current LCLid
    client_data = ft_data7[ft_data7['LCLid'] == LCLid]

    clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

    # Create the client dataset for the current LCLid
    preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

    # Extract a sample batch from the preprocessed dataset
    sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

    # Store the preprocessed dataset in the dictionary with LCLid as the key
    test_dataset7[LCLid] = preprocessed_client_dataset

In [None]:
# test_dataset8 = {}

# for LCLid in test_client:
#     # Filter the dataframe for the current LCLid
#     client_data = ft_data8[ft_data8['LCLid'] == LCLid]

#     clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

#     # Create the client dataset for the current LCLid
#     preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

#     # Extract a sample batch from the preprocessed dataset
#     sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

#     # Store the preprocessed dataset in the dictionary with LCLid as the key
#     test_dataset8[LCLid] = preprocessed_client_dataset

In [None]:
# test_dataset9 = {}

# for LCLid in test_client:
#     # Filter the dataframe for the current LCLid
#     client_data = ft_data9[ft_data9['LCLid'] == LCLid]

#     clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

#     # Create the client dataset for the current LCLid
#     preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

#     # Extract a sample batch from the preprocessed dataset
#     sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

#     # Store the preprocessed dataset in the dictionary with LCLid as the key
#     test_dataset9[LCLid] = preprocessed_client_dataset

In [None]:
# test_dataset10 = {}

# for LCLid in test_client:
#     # Filter the dataframe for the current LCLid
#     client_data = ft_data10[ft_data10['LCLid'] == LCLid]

#     clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

#     # Create the client dataset for the current LCLid
#     preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

#     # Extract a sample batch from the preprocessed dataset
#     sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

#     # Store the preprocessed dataset in the dictionary with LCLid as the key
#     test_dataset10[LCLid] = preprocessed_client_dataset

In [None]:
# test_dataset11 = {}

# for LCLid in test_client:
#     # Filter the dataframe for the current LCLid
#     client_data = ft_data11[ft_data11['LCLid'] == LCLid]

#     clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

#     # Create the client dataset for the current LCLid
#     preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

#     # Extract a sample batch from the preprocessed dataset
#     sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

#     # Store the preprocessed dataset in the dictionary with LCLid as the key
#     test_dataset11[LCLid] = preprocessed_client_dataset

In [None]:
# test_dataset12 = {}

# for LCLid in test_client:
#     # Filter the dataframe for the current LCLid
#     client_data = ft_data12[ft_data12['LCLid'] == LCLid]

#     clientxx_dataset = create_client_dataset_for_LCLid(client_data, window_size, step_size)

#     # Create the client dataset for the current LCLid
#     preprocessed_client_dataset = preprocess_client_dataset(tf.data.Dataset.from_tensor_slices(clientxx_dataset))

#     # Extract a sample batch from the preprocessed dataset
#     sam_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_client_dataset)))

#     # Store the preprocessed dataset in the dictionary with LCLid as the key
#     test_dataset12[LCLid] = preprocessed_client_dataset

In [None]:
# Make federated test data
federated_test_data1 = make_federated_data(test_dataset1, test_client)
federated_test_data2 = make_federated_data(test_dataset2, test_client)
federated_test_data3 = make_federated_data(test_dataset3, test_client)
federated_test_data4 = make_federated_data(test_dataset4, test_client)
federated_test_data5 = make_federated_data(test_dataset5, test_client)
federated_test_data6 = make_federated_data(test_dataset6, test_client)
federated_test_data7 = make_federated_data(test_dataset7, test_client)
# federated_test_data8 = make_federated_data(test_dataset8, test_client)
# federated_test_data9 = make_federated_data(test_dataset9, test_client)
# federated_test_data10 = make_federated_data(test_dataset10, test_client)
# federated_test_data11 = make_federated_data(test_dataset11, test_client)
# federated_test_data12 = make_federated_data(test_dataset12, test_client)


In [None]:
evaluation_process = tff.learning.algorithms.build_fed_eval(model_fn)

In [None]:
print(evaluation_process.next.type_signature.formatted_representation())

(<
  state=<
    global_model_weights=<
      trainable=<
        float32[336,64],
        float32[64],
        float32[64,32],
        float32[32],
        float32[32,8],
        float32[8],
        float32[8,1],
        float32[1]
      >,
      non_trainable=<>
    >,
    distributor=<>,
    client_work=<
      <>,
      <
        root_mean_squared_error=<
          float32,
          float32
        >,
        loss=<
          float32,
          float32
        >,
        num_examples=<
          int64
        >,
        num_batches=<
          int64
        >
      >
    >,
    aggregator=<
      value_sum_process=<>,
      weight_sum_process=<>
    >,
    finalizer=<>
  >@SERVER,
  client_data={<
    x=float32[?,336],
    y=float32[?,1]
  >*}@CLIENTS
> -> <
  state=<
    global_model_weights=<
      trainable=<
        float32[336,64],
        float32[64],
        float32[64,32],
        float32[32],
        float32[32,8],
        float32[8],
        float32[8,1],
        float32

In [None]:
evaluation_state = evaluation_process.initialize()
model_weights = training_process.get_model_weights(train_state)
evaluation_state = evaluation_process.set_model_weights(evaluation_state, model_weights)

In [None]:
evaluation_output1 = evaluation_process.next(evaluation_state, federated_test_data1)
evaluation_output2 = evaluation_process.next(evaluation_state, federated_test_data2)
evaluation_output3 = evaluation_process.next(evaluation_state, federated_test_data3)
evaluation_output4 = evaluation_process.next(evaluation_state, federated_test_data4)
evaluation_output5 = evaluation_process.next(evaluation_state, federated_test_data5)
evaluation_output6 = evaluation_process.next(evaluation_state, federated_test_data6)
evaluation_output7 = evaluation_process.next(evaluation_state, federated_test_data7)
# evaluation_output8 = evaluation_process.next(evaluation_state, federated_test_data8)
# evaluation_output9 = evaluation_process.next(evaluation_state, federated_test_data9)
# evaluation_output10 = evaluation_process.next(evaluation_state, federated_test_data10)
# evaluation_output11 = evaluation_process.next(evaluation_state, federated_test_data11)
# evaluation_output12 = evaluation_process.next(evaluation_state, federated_test_data12)

In [None]:
loss_values_1 = []

# Append MAE values to the list
loss_values_1.append(evaluation_output1.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output2.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output3.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output4.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output5.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output6.metrics['client_work']['eval']['current_round_metrics']['loss'])
loss_values_1.append(evaluation_output7.metrics['client_work']['eval']['current_round_metrics']['loss'])
# loss_values_1.append(evaluation_output8.metrics['client_work']['eval']['current_round_metrics']['loss'])
# loss_values_1.append(evaluation_output9.metrics['client_work']['eval']['current_round_metrics']['loss'])
# loss_values_1.append(evaluation_output10.metrics['client_work']['eval']['current_round_metrics']['loss'])
# loss_values_1.append(evaluation_output11.metrics['client_work']['eval']['current_round_metrics']['loss'])
# loss_values_1.append(evaluation_output12.metrics['client_work']['eval']['current_round_metrics']['loss'])


# Print the MAE values
print(loss_values_1)

[0.021250479, 0.019084923, 0.011400148, 0.011378154, 0.07969704, 0.041061617, 0.029411878]


In [None]:
loss_values_2 = []

# Append MAE values to the list
loss_values_2.append(evaluation_output1.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output2.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output3.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output4.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output5.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output6.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
loss_values_2.append(evaluation_output7.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
# loss_values_2.append(evaluation_output8.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
# loss_values_2.append(evaluation_output9.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
# loss_values_2.append(evaluation_output10.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
# loss_values_2.append(evaluation_output11.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])
# loss_values_2.append(evaluation_output12.metrics['client_work']['eval']['current_round_metrics']['root_mean_squared_error'])


# Print the MAE values
print(loss_values_2)

[0.07502663, 0.111199975, 0.12958628, 0.03374207, 0.1143148, 0.08410741, 0.009626329]


In [None]:
import plotly.graph_objects as go
import numpy as np

# Data
dates = ['Feb 22', 'Feb 23', 'Feb 24', 'Feb 25', 'Feb 26', 'Feb 27', 'Feb 28']
rmse_values = [0.07503, 0.11120, 0.12959, 0.03374, 0.11431, 0.08411 ,0.00963]

# Calculate mean
mean_rmse = np.mean(rmse_values)

# Creating the plot
fig = go.Figure()

# Adding the trace for RMSE values
fig.add_trace(go.Scatter(x=dates, y=rmse_values, mode='lines+markers', name='RMSE'))

# Adding a line for mean RMSE
fig.add_shape(
    type="line",
    x0=dates[0],
    y0=mean_rmse,
    x1=dates[-1],
    y1=mean_rmse,
    line=dict(color='red', dash='dash'),
    name='Mean RMSE'
)

# Updating the axis labels and title
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='RMSE',
    title='Weekly Forecast (Feb 22 - Feb 28) of a Moderate Consumer - Cluster 18'
)

# Display the plot
fig.show()