In [1]:
import os.path

import pandas as pd
from sklearn.metrics import accuracy_score
import numpy as np
from config_loader import load_config


In [2]:
config, config_dir = load_config()

household_id = '01'
threshold = 0.0
env = config['Settings']['environment']
data_path = config[env]['data_path']
appliances_file = config['Data']['appliances_file']
demo_inferred_data_file = config['Data']['inferred_data_file']
demo_dataset_ground_truth_file = config['Data']['demo_dataset_ground_truth_file']

In [3]:
def print_zero_one_distribution(df, name):
    print(f"\n{name} Zero/One Distribution (%):")
    for col in df.columns:
        zeros = (df[col] == 0).mean() * 100
        ones = (df[col] == 1).mean() * 100
        print(f"{col:<18}  0: {zeros:5.2f}%   1: {ones:5.2f}%")

def print_zero_percentage(df, name):
    print(f"\n{name} - Percentage of 0s per appliance:")
    for col in df.columns:
        zero_pct = (df[col] == 0).mean() * 100
        print(f"{col:<18}: {zero_pct:6.2f}%")


In [4]:
# Read txt file
with open(os.path.join(data_path, appliances_file), 'r') as f:
    appliance_order = [line.strip() for line in f.readlines()]

Ground truth

In [5]:
ground_truth = pd.read_parquet(os.path.join(data_path, demo_dataset_ground_truth_file))
ground_truth = ground_truth.drop(columns=["household_id"])
ground_truth = ground_truth.drop(columns=["timestamp"])

print_zero_percentage(ground_truth, "Ground Truth")

ground_truth_binary = (ground_truth > threshold).astype(int)

print_zero_one_distribution(ground_truth_binary, "Ground Truth")


Ground Truth - Percentage of 0s per appliance:
Coffee Machine    :  97.78%
Dryer             :  70.74%
Freezer           :   0.03%
Fridge            :   0.06%
Lamp              : 100.00%
Laptop            : 100.00%
Microwave         : 100.00%
PC                : 100.00%
Router            : 100.00%
Tablet            : 100.00%
Washing Machine   :  51.01%
Other             :   0.05%

Ground Truth Zero/One Distribution (%):
Coffee Machine      0: 97.78%   1:  2.22%
Dryer               0: 70.74%   1: 29.26%
Freezer             0:  0.03%   1: 99.97%
Fridge              0:  0.06%   1: 99.94%
Lamp                0: 100.00%   1:  0.00%
Laptop              0: 100.00%   1:  0.00%
Microwave           0: 100.00%   1:  0.00%
PC                  0: 100.00%   1:  0.00%
Router              0: 100.00%   1:  0.00%
Tablet              0: 100.00%   1:  0.00%
Washing Machine     0: 51.01%   1: 48.99%
Other               0:  0.05%   1: 99.95%


Baseline all 0s

In [6]:
# Create a DataFrame of zeros with the same shape and columns as prediction_binary
all_off_baseline = pd.DataFrame(
    data=0,
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Baseline accuracy: predicting all appliances as off
baseline_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    all_off_baseline.values.flatten()
)

print(f"Baseline accuracy (all appliances off): {baseline_accuracy * 100:.2f}%")
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], all_off_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (all appliances off): 68.31%
Accuracy Coffee Machine: 97.78%
Accuracy Dryer: 70.74%
Accuracy Freezer: 0.03%
Accuracy Fridge: 0.06%
Accuracy Lamp: 100.00%
Accuracy Laptop: 100.00%
Accuracy Microwave: 100.00%
Accuracy PC: 100.00%
Accuracy Router: 100.00%
Accuracy Tablet: 100.00%
Accuracy Washing Machine: 51.01%
Accuracy Other: 0.05%


Baseline all 1s

In [7]:
# Create a DataFrame of zeros with the same shape and columns as prediction_binary
all_on_baseline = pd.DataFrame(
    data=1,
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Baseline accuracy: predicting all appliances as off
baseline_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    all_on_baseline.values.flatten()
)

print(f"Baseline accuracy (all appliances on): {baseline_accuracy * 100:.2f}%")
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], all_on_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (all appliances on): 31.69%
Accuracy Coffee Machine: 2.22%
Accuracy Dryer: 29.26%
Accuracy Freezer: 99.97%
Accuracy Fridge: 99.94%
Accuracy Lamp: 0.00%
Accuracy Laptop: 0.00%
Accuracy Microwave: 0.00%
Accuracy PC: 0.00%
Accuracy Router: 0.00%
Accuracy Tablet: 0.00%
Accuracy Washing Machine: 48.99%
Accuracy Other: 99.95%


Baseline random

In [8]:
# Create a random 0/1 DataFrame with the same shape and columns
np.random.seed(42)  # For reproducibility
random_baseline = pd.DataFrame(
    data=np.random.randint(0, 2, size=ground_truth_binary.shape),
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Compute overall accuracy of the random baseline
random_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    random_baseline.values.flatten()
)
print(f"\nBaseline accuracy (random 0/1): {random_accuracy * 100:.2f}%")

# Compute per-appliance accuracy
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], random_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (random 0/1): 50.02%
Accuracy Coffee Machine: 50.13%
Accuracy Dryer: 49.90%
Accuracy Freezer: 50.05%
Accuracy Fridge: 50.10%
Accuracy Lamp: 49.92%
Accuracy Laptop: 50.10%
Accuracy Microwave: 50.16%
Accuracy PC: 49.87%
Accuracy Router: 50.02%
Accuracy Tablet: 50.08%
Accuracy Washing Machine: 49.85%
Accuracy Other: 50.08%


Predictions

In [9]:
prediction = pd.read_parquet(os.path.join(data_path, demo_inferred_data_file))

prediction["timestamp"] = pd.to_datetime(prediction["timestamp"])

prediction["household_id"] = household_id

prediction_wide = prediction.pivot_table(
    index=["timestamp", "household_id"],
    columns="appliance",
    values="value",
    aggfunc="first"
).reset_index()

prediction_wide.columns.name = None

column_order = [col for col in appliance_order if col != "timestamp" and col != "household_id"]
prediction_wide = prediction_wide[["timestamp"] + column_order]

prediction_wide = prediction_wide.drop(columns=["timestamp"])

prediction_binary = (prediction_wide > threshold).astype(int)

print_zero_one_distribution(prediction_binary, "Prediction")

overall_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    prediction_binary.values.flatten()
)
print(f"Total Accuracy: {overall_accuracy * 100:.2f}%")

for col in ground_truth.columns:
    acc = accuracy_score(y_true=ground_truth_binary[col], y_pred=prediction_binary[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Prediction Zero/One Distribution (%):
Coffee Machine      0: 100.00%   1:  0.00%
Dryer               0:  3.00%   1: 97.00%
Freezer             0:  0.39%   1: 99.61%
Fridge              0:  0.00%   1: 100.00%
Lamp                0: 93.19%   1:  6.81%
Laptop              0: 100.00%   1:  0.00%
Microwave           0: 99.05%   1:  0.95%
PC                  0: 90.87%   1:  9.13%
Router              0: 96.40%   1:  3.60%
Tablet              0: 95.27%   1:  4.73%
Washing Machine     0: 99.39%   1:  0.61%
Other               0: 95.02%   1:  4.98%
Total Accuracy: 79.91%
Accuracy Coffee Machine: 97.78%
Accuracy Dryer: 30.65%
Accuracy Freezer: 99.58%
Accuracy Fridge: 99.94%
Accuracy Lamp: 93.19%
Accuracy Laptop: 100.00%
Accuracy Microwave: 99.05%
Accuracy PC: 90.87%
Accuracy Router: 96.40%
Accuracy Tablet: 95.27%
Accuracy Washing Machine: 51.18%
Accuracy Other: 5.01%
