In [1]:
import pandas as pd
from datetime import datetime, timedelta
import os
from config_loader import load_config
from joblib import load
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import json

In [2]:
household_id = '05'

config, config_dir = load_config()
env = config['Settings']['environment']
models_dir = config['Data']['models_dir']
scalers_dir = config['Data']['scalers_dir']
target_scalers_dir = config['Data']['target_scalers_dir']
data_path = config[env]['data_path']
training_dataset_file = config['Data']['training_dataset_file']
infer_data_file = config['Data']['infer_data_file']
demo_dataset_ground_truth_file = config['Data']['demo_dataset_ground_truth_file']
inference_timestamp = config['Inference']['inference_timestamp']
model_file = config['Data']['model_file']

inferred_data_file = config['Data']['inferred_data_file']
column_names_file = config['Data']['training_dataset_columns_file']
input_scaler_file = config['Data']['input_scaler_file']
target_scalers_file = config['Data']['target_scalers_file']
batch_size = int(config['Inference']['batch_size'])

model_path = os.path.join(data_path, models_dir)
inferred_data_path = os.path.join(data_path, inferred_data_file)
infer_data_path = os.path.join(data_path, infer_data_file)
input_scaler_path = os.path.join(data_path, scalers_dir, input_scaler_file)
target_scalers_path = os.path.join(data_path, scalers_dir, target_scalers_dir)

input_scaler = load(input_scaler_path)

device = torch.device('cpu')

In [3]:
# Load the file
df = pd.read_parquet(os.path.join(data_path, training_dataset_file))
df_gt = df.copy()

# Read appliance names from the text file
with open(os.path.join(data_path, column_names_file), 'r') as file:
    column_names_json = json.load(file)

appliances_list = column_names_json['appliances']

In [4]:
# Keep only certain columns
df = df[df.columns[:11]]
df_gt = df_gt.iloc[:, [0, 1] + list(range(11, df_gt.shape[1]))]
df = df[df['household_id'] == household_id]
df_gt = df_gt[df_gt['household_id'] == household_id]
df.drop(columns=['household_id'], inplace=True)

print(df.shape)

# Convert 'timestamp' to datetime if it's not already
df['timestamp'] = pd.to_datetime(df['timestamp'])
df_gt['timestamp'] = pd.to_datetime(df_gt['timestamp'])

# Get the 71 unique dates (just the date part)
unique_dates = sorted(df['timestamp'].dt.date.unique())

# Create the new continuous date range: yesterday to 71 days before
new_dates = [datetime.now().date() - timedelta(days=i+1) for i in range(len(unique_dates))]
new_dates = sorted(new_dates)  # Sort to maintain original order if needed

# Map old dates to new dates
date_mapping = dict(zip(unique_dates, new_dates))

# Replace the 'timestamp' with the mapped date (keeping the time component)
df['timestamp'] = df['timestamp'].apply(
    lambda x: datetime.combine(date_mapping[x.date()], x.time())
)
df_gt['timestamp'] = df_gt['timestamp'].apply(
    lambda x: datetime.combine(date_mapping[x.date()], x.time())
)

(613440, 10)


In [5]:
# Save df to parquet file
df.to_parquet(os.path.join(data_path, infer_data_file))
df_gt.to_parquet(os.path.join(data_path, demo_dataset_ground_truth_file))

In [6]:
# Read appliance names from the text file
with open(os.path.join(data_path, column_names_file), 'r') as file:
    column_names_json = json.load(file)

appliances_list = column_names_json['appliances']

def create_day_dataset_from_file():
    df = pd.read_parquet(infer_data_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    timestamps = df['timestamp'].reset_index(drop=True)  # Keep timestamps for later
    df = df.drop(columns=['timestamp'])

    # Apply normalization
    X_day = input_scaler.transform(df)
    return X_day, timestamps


def run_inference(X_day, appliance):
    appliance_name = appliance.lower().replace(' ', '_')
    model = torch.jit.load(os.path.join(model_path, appliance_name + model_file))
    model.eval()

    if len(X_day.shape) == 2:
        X_day = np.expand_dims(X_day, axis=0)  # Add batch dimension at axis=0

    X_day_tensor = torch.tensor(X_day, dtype=torch.float32).to(device)
    day_dataset = TensorDataset(X_day_tensor)
    day_loader = DataLoader(day_dataset, batch_size=batch_size, shuffle=False)

    predictions_all = []
    with torch.no_grad():
        for (batch_X,) in day_loader:
            batch_X = batch_X.to(device)
            batch_predictions = model(batch_X)

            # Clamp predictions to be non-negative (power >= 0)
            batch_predictions = torch.clamp(batch_predictions, min=0.0)

            # Convert to NumPy after clamping
            predictions_all.append(batch_predictions.cpu().numpy())

    predictions_np = np.concatenate(predictions_all, axis=0)
    return predictions_np

def melt_dataframe(df):
    # Melt the DataFrame to long format
    df_long = pd.melt(df,
                      id_vars=['timestamp'],   # Columns to keep
                      var_name='appliance',    # New column for appliance names
                      value_name='value')      # New column for values

    # Convert timestamp and extract date, hour, month
    df_long['timestamp'] = pd.to_datetime(df_long['timestamp'])
    df_long['date'] = df_long['timestamp'].dt.date
    df_long['minute'] = df_long['timestamp'].dt.minute
    df_long['hour'] = df_long['timestamp'].dt.hour
    df_long['month'] = df_long['timestamp'].dt.to_period('M')

    # Sort by timestamp (and optionally by appliance if you want consistent order)
    df_long = df_long.sort_values(by=['timestamp'])

    return df_long


def compute_other_column(p_df, sm_df):
    # Ensure timestamps are datetime and sorted
    p_df['timestamp'] = pd.to_datetime(p_df['timestamp'])
    sm_df['timestamp'] = pd.to_datetime(sm_df['timestamp'])

    p_df = p_df.sort_values('timestamp').reset_index(drop=True)
    sm_df = sm_df.sort_values('timestamp').reset_index(drop=True)

    # Sum predicted appliance power per timestamp (exclude 'timestamp' column)
    appliance_cols = p_df.columns.difference(['timestamp'])
    p_df['total_pred_power'] = p_df[appliance_cols].sum(axis=1)

    # Sum smart meter phases to get total power per timestamp
    phase_cols = [col for col in sm_df.columns if col.lower() in ['powerl1', 'powerl2', 'powerl3']]
    sm_df['total_sm_power'] = sm_df[phase_cols].sum(axis=1)

    # Merge on timestamp to align rows
    merged = pd.merge(p_df, sm_df[['timestamp', 'total_sm_power']], on='timestamp', how='inner')

    # Compute 'Other' = smart meter total - sum predicted appliances
    merged['Other'] = merged['total_sm_power'] - merged['total_pred_power']

    # Clip negative values to zero
    merged['Other'] = merged['Other'].clip(lower=0)

    # Optional: keep original columns + Other column, drop helper cols
    result = merged.drop(columns=['total_pred_power', 'total_sm_power'])

    return result


def append_predictions(timestamps, predictions_dict):
    """
    timestamps: list of timestamps (len = total timesteps)
    predictions_dict: dict of {appliance_name: np.ndarray of shape (total_timesteps,)}
                      or (1, seq_len, 1) / (batch, seq_len, 1)
    """
    pred_df = pd.DataFrame({'timestamp': timestamps})

    for appliance, pred in predictions_dict.items():
        print(f"Processing {appliance} - shape before reshape:", pred.shape)
        appliance_name = appliance.lower().replace(' ', '_')

        # Remove batch dimension if necessary
        if pred.ndim == 3 and pred.shape[0] == 1:
            pred = pred[0]  # shape: (seq_len, 1)
        if pred.ndim == 2 and pred.shape[1] == 1:
            pred = pred[:, 0]  # shape: (seq_len,)
        elif pred.ndim == 3:
            pred = pred.reshape(-1, pred.shape[2])[:, 0]  # flatten and squeeze

        print(f"Shape after reshape for {appliance}:", pred.shape)

        # Inverse scale
        target_scaler = load(os.path.join(target_scalers_path, appliance_name + target_scalers_file))
        pred_reshaped = pred.reshape(-1, 1)
        pred_inverse = target_scaler.inverse_transform(pred_reshaped).flatten()

        pred_df[appliance] = pred_inverse

    pred_df = compute_other_column(pred_df, df)

    # Melt and save
    pred_df = melt_dataframe(pred_df)
    pred_df.to_parquet(inferred_data_path, index=False)


def split_parquet_by_date(input_parquet_path, output_root):
    df = pd.read_parquet(input_parquet_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['date'] = df['timestamp'].dt.date
    grouped = df.groupby('date')

    for date, group in grouped:
        folder_name = f'date={date}'
        folder_path = os.path.join(output_root, folder_name)
        os.makedirs(folder_path, exist_ok=True)

        group = group.drop(columns=['date'])
        output_path = os.path.join(folder_path, 'predictions.parquet')
        group.to_parquet(output_path, index=False)

    print("Done splitting into daily folders.")


X_day, timestamps = create_day_dataset_from_file()
predictions = {}
for appliance in appliances_list:
    predictions_np = run_inference(X_day, appliance=appliance)
    predictions[appliance] = predictions_np

append_predictions(timestamps, predictions)
daily_partitions_root = os.path.join(data_path, 'demo_dataset')  # same as your original output_root
split_parquet_by_date(inferred_data_path, daily_partitions_root)

Processing Coffee Machine - shape before reshape: (1, 613440, 1)
Shape after reshape for Coffee Machine: (613440,)
Processing Dryer - shape before reshape: (1, 613440, 1)
Shape after reshape for Dryer: (613440,)
Processing Freezer - shape before reshape: (1, 613440, 1)
Shape after reshape for Freezer: (613440,)
Processing Fridge - shape before reshape: (1, 613440, 1)
Shape after reshape for Fridge: (613440,)
Processing Lamp - shape before reshape: (1, 613440, 1)
Shape after reshape for Lamp: (613440,)
Processing Laptop - shape before reshape: (1, 613440, 1)
Shape after reshape for Laptop: (613440,)
Processing Microwave - shape before reshape: (1, 613440, 1)
Shape after reshape for Microwave: (613440,)
Processing PC - shape before reshape: (1, 613440, 1)
Shape after reshape for PC: (613440,)
Processing Router - shape before reshape: (1, 613440, 1)
Shape after reshape for Router: (613440,)
Processing Tablet - shape before reshape: (1, 613440, 1)
Shape after reshape for Tablet: (613440,)
