In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import mean_absolute_error, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
import sys
import os
from pathlib import Path

# Add project root to path
sys.path.append(os.path.dirname(os.getcwd()))

# For PyTorch 2.6+ to allow loading Sklearn Scalers
try:
    from sklearn.preprocessing import StandardScaler
    torch.serialization.add_safe_globals([StandardScaler])
except:
    pass

try:
    from train_transformer_10sec import NILMTransformer, Config, load_data, NILMDataset10Sec
except ImportError:
    from model_highfreq.train_transformer_10sec import NILMTransformer, Config, load_data, NILMDataset10Sec

# ==============================================================================
# 1. SETUP & CONFIGURATION
# ==============================================================================
APPLIANCE = 'Dishwasher'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
THRESHOLD = 0.02 # 20W detection (assuming data is in kW)

# Try different possible locations for the model
poss_model_paths = [
    Path(f'models/transformer_10sec_{APPLIANCE.lower()}_best.pth'),
    Path(f'../models/transformer_10sec_{APPLIANCE.lower()}_best.pth'),
    Path(f'transformer_10sec_{APPLIANCE.lower()}_best.pth'),
    Path(f'./transformer_10sec_{APPLIANCE.lower()}_best.pth'),
    Path(f'model_highfreq/transformer_10sec_{APPLIANCE.lower()}_best.pth')
]
MODEL_FILE = next((p for p in poss_model_paths if p.exists()), None)

if not MODEL_FILE:
    from glob import glob
    matches = glob(f'**/transformer_10sec_{APPLIANCE.lower()}_best.pth', recursive=True)
    if matches: MODEL_FILE = Path(matches[0])

print(f"Loading model for: {APPLIANCE}")
print(f"Model Path: {MODEL_FILE}")
print(f"Device: {DEVICE}")

cfg = Config()

# Robust Data Path Discovery
data_possibilities = [
    Path(cfg.DATA_PATH),
    Path('../') / cfg.DATA_PATH,
    Path('../../') / cfg.DATA_PATH,
    Path(r'C:\Users\Tommaso\Documents\MEGAR2D2\HOWEST\TeamProject\MTS3-MCTE-Team-Project-Energy-G1\.tmp\10sec\10sec\nilm_10sec_mar_may.parquet')
]
REAL_DATA_PATH = next((p for p in data_possibilities if p.exists()), None)
print(f"Data Path found: {REAL_DATA_PATH}")

# ==============================================================================
# 2. LOAD MODEL & SCALERS
# ==============================================================================
if MODEL_FILE:
    checkpoint = torch.load(MODEL_FILE, map_location=DEVICE, weights_only=False)
    scaler_X = checkpoint['scaler_X']
    scaler_y = checkpoint['scaler_y']
    model = NILMTransformer(n_features=4, window_size=cfg.WINDOW_SIZE).to(DEVICE)
    model.load_state_dict(checkpoint['model'])
    model.eval()
    print("Model and scalers loaded successfully.")
else:
    print("ERROR: Model file not found!")
    sys.exit()

# ==============================================================================
# 3. LOAD DATA
# ==============================================================================
print("\nLoading dataset...")
# Using a more direct reading method to bypass pandas-pyarrow re-registration issues if possible
import pyarrow.parquet as pq
table = pq.read_table(str(REAL_DATA_PATH))
df = table.to_pandas()

# Feature engineering
agg = df['Aggregate'].values
dP_dt = np.zeros_like(agg)
dP_dt[1:] = agg[1:] - agg[:-1]
dt = pd.to_datetime(df['Time'])
hour = dt.dt.hour.values
hour_sin = np.sin(2 * np.pi * hour / 24)
hour_cos = np.cos(2 * np.pi * hour / 24)
X_raw = np.column_stack([agg, dP_dt, hour_sin, hour_cos])
y_raw = df[APPLIANCE].values

agg_raw = X_raw[:, 0]
X_scaled = scaler_X.transform(X_raw)
y_scaled = scaler_y.transform(y_raw.reshape(-1, 1)).flatten()

n = len(X_scaled)
val_end = int(n * 0.85)

X_test = X_scaled[val_end:]
y_test = y_scaled[val_end:]
agg_test = agg_raw[val_end:]

print(f"Test samples: {len(X_test)}")

test_ds = NILMDataset10Sec(X_test, y_test, cfg.WINDOW_SIZE)
test_loader = DataLoader(test_ds, batch_size=cfg.BATCH_SIZE, shuffle=False)

# ==============================================================================
# 4. EVALUATION
# ==============================================================================
print("\nStarting evaluation...")
all_preds = []
all_targets = []

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch = x_batch.to(DEVICE)
        # Use try-except to handle possible device mismatch in some kernels
        try:
            outputs = model(x_batch).squeeze().cpu().numpy()
        except:
             outputs = model(x_batch.to(DEVICE)).squeeze().cpu().numpy()
        all_preds.append(outputs)
        all_targets.append(y_batch.numpy())

y_pred_scaled = np.concatenate(all_preds).reshape(-1, 1)
y_true_scaled = np.concatenate(all_targets).reshape(-1, 1)

y_pred_real = np.maximum(scaler_y.inverse_transform(y_pred_scaled).flatten(), 0)
y_true_real = scaler_y.inverse_transform(y_true_scaled).flatten()

mid = cfg.WINDOW_SIZE // 2
agg_vis = agg_test[mid : mid + len(y_pred_real)]
y_true_vis = y_true_real

y_pred_bin = (y_pred_real > THRESHOLD).astype(int)
y_true_bin = (y_true_vis > THRESHOLD).astype(int)

f1 = f1_score(y_true_bin, y_pred_bin, zero_division=0)
mae = mean_absolute_error(y_true_vis, y_pred_real)

print("\n" + "="*30)
print(f"RESULTS FOR {APPLIANCE}")
print(f"MAE: {mae*1000:.2f} W")
print(f"F1 Score: {f1:.4f}")
print("="*30)

# ==============================================================================
# 5. VISUALIZATION
# ==============================================================================
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))

# Plot 1: Overview
ax1.plot(agg_vis[:10000], label='Aggregate (Total House)', color='black', alpha=0.15)
ax1.plot(y_true_vis[:10000], label='Ground Truth', color='C0', alpha=0.8)
ax1.plot(y_pred_real[:10000], label='Transformer Prediction', color='C1', alpha=0.7)
ax1.set_title(f'{APPLIANCE} Overview - Test Set')
ax1.set_ylabel('Power (kW)')
ax1.legend()
ax1.grid(True, alpha=0.2)

# Plot 2: Zoom on first active cycle found
on_indices = np.where(y_true_vis > THRESHOLD)[0]
if len(on_indices) > 0:
    idx = on_indices[0] + 500 # Just taking a window near the start
    start, end = max(0, idx - 500), min(len(y_true_vis), idx + 1500)
    
    ax2.fill_between(range(end-start), 0, agg_vis[start:end], color='black', alpha=0.05, label='Aggregate')
    ax2.plot(y_true_vis[start:end], label='Ground Truth', color='C0', linewidth=2)
    ax2.plot(y_pred_real[start:end], label='Transformer', color='C1', linestyle='--', linewidth=2)
    ax2.set_title(f'{APPLIANCE} - Active Sequence Comparison')
    ax2.set_ylabel('Power (kW)')
    ax2.legend()
    ax2.grid(True, alpha=0.2)

plt.tight_layout()
plt.show()

Loading model for: Dishwasher
Model Path: transformer_10sec_dishwasher_best.pth
Device: cuda
Data Path found: ..\.tmp\10sec\10sec\nilm_10sec_mar_may.parquet
Model and scalers loaded successfully.

Loading dataset...
Test samples: 95529

Starting evaluation...
