In [14]:
# First Cell - Imports and Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import torch
import json
import os

# Set style for better visualizations
plt.style.use('default')
sns.set_theme()
print("Setup Complete!")

# Second Cell - Load Data
# Load original data
data = pd.read_csv('../data/powerconsumption.csv')
print("Original Data Shape:", data.shape)
print("\nFirst few rows of original data:")
print(data.head())

# Load preprocessed data
preprocessed_path = '../data/processed'
results_path = '../data/results'

# Load preprocessed data
train_data = np.load(os.path.join(preprocessed_path, 'train_data.npy'))
val_data = np.load(os.path.join(preprocessed_path, 'val_data.npy'))
test_data = np.load(os.path.join(preprocessed_path, 'test_data.npy'))

print("\nPreprocessed Data Shapes:")
print(f"Train data: {train_data.shape}")
print(f"Validation data: {val_data.shape}")
print(f"Test data: {test_data.shape}")

# Load model results
transformer_results = np.load(os.path.join(results_path, 'transformer_predictions.npy'))
patchtst_results = np.load(os.path.join(results_path, 'patchtst_predictions.npy'))
actual_values = np.load(os.path.join(results_path, 'actual_values.npy'))

print("\nModel Results Shapes:")
print(f"Transformer predictions: {transformer_results.shape}")
print(f"PatchTST predictions: {patchtst_results.shape}")
print(f"Actual values: {actual_values.shape}")

# Third Cell - Calculate Metrics
def calculate_metrics(y_true, y_pred, zone_idx):
    """Calculate comprehensive metrics for a specific zone"""
    mae = mean_absolute_error(y_true[:, zone_idx], y_pred[:, zone_idx])
    mse = mean_squared_error(y_true[:, zone_idx], y_pred[:, zone_idx])
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true[:, zone_idx], y_pred[:, zone_idx])
    
    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'R2': r2
    }

# Calculate metrics for each zone and model
zones = ['Zone 1', 'Zone 2', 'Zone 3']
models = ['Transformer', 'PatchTST']
results = {}

for zone_idx, zone in enumerate(zones):
    results[zone] = {
        'Transformer': calculate_metrics(actual_values, transformer_results, zone_idx),
        'PatchTST': calculate_metrics(actual_values, patchtst_results, zone_idx)
    }

# Display results as a DataFrame
metrics_df = pd.DataFrame({
    (zone, model, metric): value
    for zone in zones
    for model in models
    for metric, value in results[zone][model].items()
}).unstack(level=[1, 2])

print("\nModel Performance Metrics:")
print(metrics_df)

Setup Complete!
Original Data Shape: (52416, 9)

First few rows of original data:
        Datetime  Temperature  Humidity  WindSpeed  GeneralDiffuseFlows  \
0  1/1/2017 0:00        6.559      73.8      0.083                0.051   
1  1/1/2017 0:10        6.414      74.5      0.083                0.070   
2  1/1/2017 0:20        6.313      74.5      0.080                0.062   
3  1/1/2017 0:30        6.121      75.0      0.083                0.091   
4  1/1/2017 0:40        5.921      75.7      0.081                0.048   

   DiffuseFlows  PowerConsumption_Zone1  PowerConsumption_Zone2  \
0         0.119             34055.69620             16128.87538   
1         0.085             29814.68354             19375.07599   
2         0.100             29128.10127             19006.68693   
3         0.096             28228.86076             18361.09422   
4         0.085             27335.69620             17872.34043   

   PowerConsumption_Zone3  
0             20240.96386  
1       

FileNotFoundError: [Errno 2] No such file or directory: '../data/processed\\train_data.npy'

In [12]:
# Second Cell - Load Data
import os
import numpy as np
import pandas as pd
import json

# Define paths relative to notebook location
DATA_DIR = os.path.join(os.path.dirname(os.getcwd()), 'data')

# Load original data
data = pd.read_csv(os.path.join(DATA_DIR, 'powerconsumption.csv'))
print("Original Data Shape:", data.shape)
print("\nFirst few rows of original data:")
print(data.head())

# Load preprocessed data
try:
    train_data = np.load(os.path.join(DATA_DIR, 'processed/train_data.npy'))
    val_data = np.load(os.path.join(DATA_DIR, 'processed/val_data.npy'))
    test_data = np.load(os.path.join(DATA_DIR, 'processed/test_data.npy'))
    
    print("\nLoaded Preprocessed Data Shapes:")
    print(f"Train data: {train_data.shape}")
    print(f"Validation data: {val_data.shape}")
    print(f"Test data: {test_data.shape}")
    
    # Load preprocessing info
    with open(os.path.join(DATA_DIR, 'processed/preprocessing_info.json'), 'r') as f:
        preprocessing_info = json.load(f)
    print("\nPreprocessing Info:")
    print(json.dumps(preprocessing_info, indent=2))

except FileNotFoundError:
    print("\nPreprocessed data not found. Please run preprocessing first:")
    print("python src/data_preprocessing.py")

Original Data Shape: (52416, 9)

First few rows of original data:
        Datetime  Temperature  Humidity  WindSpeed  GeneralDiffuseFlows  \
0  1/1/2017 0:00        6.559      73.8      0.083                0.051   
1  1/1/2017 0:10        6.414      74.5      0.083                0.070   
2  1/1/2017 0:20        6.313      74.5      0.080                0.062   
3  1/1/2017 0:30        6.121      75.0      0.083                0.091   
4  1/1/2017 0:40        5.921      75.7      0.081                0.048   

   DiffuseFlows  PowerConsumption_Zone1  PowerConsumption_Zone2  \
0         0.119             34055.69620             16128.87538   
1         0.085             29814.68354             19375.07599   
2         0.100             29128.10127             19006.68693   
3         0.096             28228.86076             18361.09422   
4         0.085             27335.69620             17872.34043   

   PowerConsumption_Zone3  
0             20240.96386  
1             20131.0843

In [13]:
# Third Cell - Evaluation Metrics Implementation
def calculate_metrics(y_true, y_pred, zone_idx):
    """
    Calculate comprehensive metrics for a specific zone
    Args:
        y_true: Actual values
        y_pred: Predicted values
        zone_idx: Index of the zone (0, 1, or 2)
    Returns:
        Dictionary of metrics
    """
    mae = mean_absolute_error(y_true[:, zone_idx], y_pred[:, zone_idx])
    mse = mean_squared_error(y_true[:, zone_idx], y_pred[:, zone_idx])
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true[:, zone_idx], y_pred[:, zone_idx])
    
    # Calculate additional metrics
    mape = np.mean(np.abs((y_true[:, zone_idx] - y_pred[:, zone_idx]) / y_true[:, zone_idx])) * 100
    
    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'R2': r2,
        'MAPE': mape
    }

# Calculate metrics for each zone and model
zones = ['Zone 1', 'Zone 2', 'Zone 3']
models = ['Transformer', 'PatchTST']
results = {}

for zone_idx, zone in enumerate(zones):
    results[zone] = {
        'Transformer': calculate_metrics(actual_values, transformer_results, zone_idx),
        'PatchTST': calculate_metrics(actual_values, patchtst_results, zone_idx)
    }

# Create and display metrics DataFrame
metrics_df = pd.DataFrame({
    (zone, model, metric): value
    for zone in zones
    for model in models
    for metric, value in results[zone][model].items()
}).unstack(level=[1, 2])

print("Detailed Performance Metrics:")
print(metrics_df)

NameError: name 'actual_values' is not defined

In [None]:
# Fourth Cell - Visualization of Predictions
def plot_predictions(actual, transformer_pred, patchtst_pred, zone_idx, zone_name):
    """Plot actual vs predicted values for a specific zone"""
    plt.figure(figsize=(15, 6))
    
    # Plot with different styles for better visibility
    plt.plot(actual[:100, zone_idx], 'b-', label='Actual', linewidth=2, alpha=0.7)
    plt.plot(transformer_pred[:100, zone_idx], 'r--', label='Transformer', linewidth=2, alpha=0.7)
    plt.plot(patchtst_pred[:100, zone_idx], 'g:', label='PatchTST', linewidth=2, alpha=0.7)
    
    plt.title(f'Power Consumption Predictions - {zone_name}', fontsize=14)
    plt.xlabel('Time Steps', fontsize=12)
    plt.ylabel('Power Consumption', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)
    
    # Add error bands
    transformer_error = np.abs(actual[:100, zone_idx] - transformer_pred[:100, zone_idx])
    patchtst_error = np.abs(actual[:100, zone_idx] - patchtst_pred[:100, zone_idx])
    
    plt.fill_between(range(100), 
                     actual[:100, zone_idx] - transformer_error,
                     actual[:100, zone_idx] + transformer_error,
                     color='red', alpha=0.1)
    plt.fill_between(range(100), 
                     actual[:100, zone_idx] - patchtst_error,
                     actual[:100, zone_idx] + patchtst_error,
                     color='green', alpha=0.1)
    
    plt.show()

# Plot predictions for each zone
for idx, zone in enumerate(zones):
    plot_predictions(actual_values, transformer_results, patchtst_results, idx, zone)

In [None]:
# Fifth Cell - Performance Metrics Comparison
def plot_metrics_comparison():
    """Plot comparison of different metrics across models and zones"""
    metrics_to_plot = ['RMSE', 'MAE', 'R2', 'MAPE']
    fig, axes = plt.subplots(2, 2, figsize=(20, 16))
    axes = axes.ravel()

    for idx, metric in enumerate(metrics_to_plot):
        data = []
        labels = []
        colors = []
        for zone in zones:
            for model in models:
                data.append(results[zone][model][metric])
                labels.append(f'{zone}\n{model}')
                colors.append('skyblue' if model == 'Transformer' else 'lightgreen')
        
        axes[idx].bar(labels, data, color=colors)
        axes[idx].set_title(f'{metric} Comparison', fontsize=14)
        axes[idx].set_ylabel(metric, fontsize=12)
        axes[idx].tick_params(axis='x', rotation=45)
        axes[idx].grid(True, alpha=0.3)
        
        # Add value labels on top of bars
        for i, v in enumerate(data):
            axes[idx].text(i, v, f'{v:.4f}', ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

plot_metrics_comparison()

In [None]:
# Sixth Cell - Detailed Analysis and Conclusions
print("Model Performance Analysis")
print("=========================")

print("\n1. Overall Model Performance:")
for zone in zones:
    print(f"\n{zone}:")
    transformer_metrics = results[zone]['Transformer']
    patchtst_metrics = results[zone]['PatchTST']
    
    # Determine better model
    better_model = 'Transformer' if transformer_metrics['RMSE'] < patchtst_metrics['RMSE'] else 'PatchTST'
    improvement = abs(transformer_metrics['RMSE'] - patchtst_metrics['RMSE'])
    
    print(f"  Best Model: {better_model}")
    print(f"  RMSE Improvement: {improvement:.4f}")
    print(f"  Transformer Metrics: RMSE={transformer_metrics['RMSE']:.4f}, R2={transformer_metrics['R2']:.4f}")
    print(f"  PatchTST Metrics: RMSE={patchtst_metrics['RMSE']:.4f}, R2={patchtst_metrics['R2']:.4f}")

print("\n2. Zone-wise Analysis:")
for zone in zones:
    print(f"\n{zone}:")
    print(f"  Transformer:")
    print(f"    - RMSE: {results[zone]['Transformer']['RMSE']:.4f}")
    print(f"    - MAE: {results[zone]['Transformer']['MAE']:.4f}")
    print(f"    - MAPE: {results[zone]['Transformer']['MAPE']:.2f}%")
    print(f"  PatchTST:")
    print(f"    - RMSE: {results[zone]['PatchTST']['RMSE']:.4f}")
    print(f"    - MAE: {results[zone]['PatchTST']['MAE']:.4f}")
    print(f"    - MAPE: {results[zone]['PatchTST']['MAPE']:.2f}%")

print("\n3. Key Findings:")
print("  - Model Performance:")
for zone in zones:
    better_model = 'Transformer' if results[zone]['Transformer']['RMSE'] < results[zone]['PatchTST']['RMSE'] else 'PatchTST'
    print(f"    * {zone}: {better_model} performs better")

print("\n4. Recommendations:")
print("  - Model Selection:")
print("    * Use Transformer model for overall better performance")
print("    * Consider ensemble approach for further improvements")
print("  - Implementation:")
print("    * Monitor performance across different time periods")
print("    * Implement regular model retraining")
print("    * Consider zone-specific model tuning")