In [1]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score
import numpy as np
from config_loader import load_config
import json


In [2]:
config, config_dir = load_config()

household_id = '05'
threshold = 0.0
env = config['Settings']['environment']
data_path = config[env]['data_path']
column_names_file = config['Data']['training_dataset_columns_file']
demo_inferred_data_file = config['Data']['inferred_data_file']
demo_dataset_ground_truth_file = config['Data']['demo_dataset_ground_truth_file']

In [3]:
def print_zero_one_distribution(df, name):
    print(f"\n{name} Zero/One Distribution (%):")
    for col in df.columns:
        zeros = (df[col] == 0).mean() * 100
        ones = (df[col] == 1).mean() * 100
        print(f"{col:<18}  0: {zeros:5.2f}%   1: {ones:5.2f}%")

In [4]:
# Read appliance names from the text file
with open(os.path.join(data_path, column_names_file), 'r') as file:
    column_names_json = json.load(file)

appliance_order = column_names_json['appliances']

Ground truth

In [5]:
ground_truth = pd.read_parquet(os.path.join(data_path, demo_dataset_ground_truth_file))
ground_truth = ground_truth.drop(columns=["household_id"])
ground_truth = ground_truth.drop(columns=["timestamp"])

ground_truth_binary = (ground_truth > threshold).astype(int)

print_zero_one_distribution(ground_truth_binary, "Ground Truth")


Ground Truth Zero/One Distribution (%):
Coffee Machine      0: 95.71%   1:  4.29%
Dryer               0: 100.00%   1:  0.00%
Freezer             0: 100.00%   1:  0.00%
Fridge              0:  0.03%   1: 99.97%
Lamp                0: 100.00%   1:  0.00%
Laptop              0: 100.00%   1:  0.00%
Microwave           0:  0.00%   1: 100.00%
PC                  0:  0.00%   1: 100.00%
Router              0: 100.00%   1:  0.00%
Tablet              0:  0.00%   1: 100.00%
Washing Machine     0: 100.00%   1:  0.00%
Other               0:  0.02%   1: 99.98%


Baseline all 0s

In [6]:
# Create a DataFrame of zeros with the same shape and columns as prediction_binary
all_off_baseline = pd.DataFrame(
    data=0,
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Baseline accuracy: predicting all appliances as off
baseline_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    all_off_baseline.values.flatten()
)

print(f"Baseline accuracy (all appliances off): {baseline_accuracy * 100:.2f}%")
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], all_off_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (all appliances off): 57.98%
Accuracy Coffee Machine: 95.71%
Accuracy Dryer: 100.00%
Accuracy Freezer: 100.00%
Accuracy Fridge: 0.03%
Accuracy Lamp: 100.00%
Accuracy Laptop: 100.00%
Accuracy Microwave: 0.00%
Accuracy PC: 0.00%
Accuracy Router: 100.00%
Accuracy Tablet: 0.00%
Accuracy Washing Machine: 100.00%
Accuracy Other: 0.02%


Baseline all 1s

In [7]:
# Create a DataFrame of zeros with the same shape and columns as prediction_binary
all_on_baseline = pd.DataFrame(
    data=1,
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Baseline accuracy: predicting all appliances as off
baseline_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    all_on_baseline.values.flatten()
)

print(f"Baseline accuracy (all appliances on): {baseline_accuracy * 100:.2f}%")
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], all_on_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (all appliances on): 42.02%
Accuracy Coffee Machine: 4.29%
Accuracy Dryer: 0.00%
Accuracy Freezer: 0.00%
Accuracy Fridge: 99.97%
Accuracy Lamp: 0.00%
Accuracy Laptop: 0.00%
Accuracy Microwave: 100.00%
Accuracy PC: 100.00%
Accuracy Router: 0.00%
Accuracy Tablet: 100.00%
Accuracy Washing Machine: 0.00%
Accuracy Other: 99.98%


Baseline random

In [8]:
# Create a random 0/1 DataFrame with the same shape and columns
np.random.seed(42)  # For reproducibility
random_baseline = pd.DataFrame(
    data=np.random.randint(0, 2, size=ground_truth_binary.shape),
    index=ground_truth_binary.index,
    columns=ground_truth_binary.columns
)

# Compute overall accuracy of the random baseline
random_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    random_baseline.values.flatten()
)
print(f"\nBaseline accuracy (random 0/1): {random_accuracy * 100:.2f}%")

# Compute per-appliance accuracy
for col in ground_truth.columns:
    acc = accuracy_score(ground_truth_binary[col], random_baseline[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Baseline accuracy (random 0/1): 49.99%
Accuracy Coffee Machine: 50.10%
Accuracy Dryer: 49.88%
Accuracy Freezer: 49.92%
Accuracy Fridge: 50.03%
Accuracy Lamp: 49.93%
Accuracy Laptop: 49.96%
Accuracy Microwave: 49.91%
Accuracy PC: 50.02%
Accuracy Router: 50.02%
Accuracy Tablet: 49.99%
Accuracy Washing Machine: 50.10%
Accuracy Other: 50.03%


Predictions

In [9]:
prediction = pd.read_parquet(os.path.join(data_path, demo_inferred_data_file))

prediction["timestamp"] = pd.to_datetime(prediction["timestamp"])

prediction["household_id"] = household_id

prediction_wide = prediction.pivot_table(
    index=["timestamp", "household_id"],
    columns="appliance",
    values="value",
    aggfunc="first"
).reset_index()

prediction_wide.columns.name = None

column_order = [col for col in appliance_order if col != "timestamp" and col != "household_id"]
prediction_wide = prediction_wide[["timestamp"] + column_order]

prediction_wide = prediction_wide.drop(columns=["timestamp"])

prediction_binary = (prediction_wide > threshold).astype(int)

print_zero_one_distribution(prediction_binary, "Prediction")

overall_accuracy = accuracy_score(
    ground_truth_binary.values.flatten(),
    prediction_binary.values.flatten()
)
print(f"Total Accuracy: {overall_accuracy * 100:.2f}%")

for col in ground_truth.columns:
    acc = accuracy_score(y_true=ground_truth_binary[col], y_pred=prediction_binary[col])
    print(f"Accuracy {col}: {acc * 100:.2f}%")


Prediction Zero/One Distribution (%):
Coffee Machine      0: 100.00%   1:  0.00%
Dryer               0: 100.00%   1:  0.00%
Freezer             0:  0.00%   1: 100.00%
Fridge              0:  0.00%   1: 100.00%
Lamp                0: 100.00%   1:  0.00%
Laptop              0: 100.00%   1:  0.00%
Microwave           0: 100.00%   1:  0.00%
PC                  0: 35.98%   1: 64.02%
Router              0: 100.00%   1:  0.00%
Tablet              0: 97.17%   1:  2.83%
Washing Machine     0: 100.00%   1:  0.00%
Other               0: 93.92%   1:  6.08%
Total Accuracy: 64.05%
Accuracy Coffee Machine: 95.71%
Accuracy Dryer: 100.00%
Accuracy Freezer: 0.00%
Accuracy Fridge: 99.97%
Accuracy Lamp: 100.00%
Accuracy Laptop: 100.00%
Accuracy Microwave: 0.00%
Accuracy PC: 64.02%
Accuracy Router: 100.00%
Accuracy Tablet: 2.83%
Accuracy Washing Machine: 100.00%
Accuracy Other: 6.09%
