In [1]:
import sys
import os
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

# Set repository root path and change to it
repo_root_path = '/content/PatchTST'
os.chdir(repo_root_path)
print(f"Working directory: {os.getcwd()}")

# Build clean sys.path with supervised ahead of physics to avoid utils shadowing
supervised_path = os.path.join(repo_root_path, 'PatchTST_supervised')
physics_path = os.path.join(repo_root_path, 'PatchTST_physics_integrated')
new_paths = [p for p in [supervised_path, physics_path, repo_root_path] if p not in sys.path]
sys.path = new_paths + sys.path  # prepend in desired order

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"Python path head: {sys.path[:5]}")

# Numpy fix
if not hasattr(np, 'Inf'):
    np.Inf = np.inf
    np.NaN = np.nan
    np.NAN = np.nan
    np.NINF = np.NINF if hasattr(np, 'NINF') else -np.inf
    print("NumPy compatibility patch applied for np.Inf -> np.inf")
else:
    print("NumPy already has np.Inf attribute")

Working directory: /content/PatchTST
PyTorch Version: 2.9.0+cu126
CUDA Available: True
Python path head: ['/content/PatchTST/PatchTST_supervised', '/content/PatchTST/PatchTST_physics_integrated', '/content/PatchTST', '/', '/env/python']
NumPy compatibility patch applied for np.Inf -> np.inf


## 1. Import Physics-Integrated PatchTST Modules

In [2]:
from PatchTST_physics_integrated.config import PhysicsIntegratedConfig
from PatchTST_physics_integrated.models import PhysicsIntegratedPatchTST
from PatchTST_physics_integrated.training_utils import set_seed, get_target_indices, get_scheduler
from PatchTST_physics_integrated.trainer import train_model
from PatchTST_physics_integrated.evaluation import evaluate_model, evaluate_per_channel
from PatchTST_physics_integrated.data_preprocessing import add_hour_of_day_features

print("✓ All modules imported successfully")

SyntaxError: unexpected character after line continuation character (models.py, line 595)

## 2. Load Configuration

In [None]:
# Create configuration
args = PhysicsIntegratedConfig()
set_seed(args.random_seed)

# Have low patience for trying attempts
args.patience = 3
args.use_cross_channel_encoder = True

# Print configuration
print("\nConfiguration:")
print(f"  Input channels: {args.enc_in}")
print(f"  Output channels: {args.c_out}")
print(f"  Sequence length: {args.seq_len}")
print(f"  Prediction length: {args.pred_len}")
print(f"  Batch size: {args.batch_size}")
print(f"  Learning rate: {args.learning_rate}")
print(f"  Patience: {args.patience}")
print(f"\nChannel Groups (sources → targets):")
for name, group in args.channel_groups.items():
    indices = group['indices']
    src_names = group.get('names', [])
    output_indices = set(group.get('output_indices', []))
    # Map output indices to names using the indices ordering
    tgt_names = [src_names[i] for i, idx in enumerate(indices) if idx in output_indices] if src_names else []
    print(f"  {name}:")
    if src_names:
        print(f"    Sources: {', '.join(src_names)}")
    else:
        print(f"    Sources: (names not provided)")
    if tgt_names:
        print(f"    Targets: {', '.join(tgt_names)}")
    else:
        print(f"    Targets: (names not provided)")

## 3. Preprocess Data (Add Hour Features)

In [None]:
# Add hour-of-day features to dataset and apply max pooling to long channel features
original_path = os.path.join(args.root_path, 'weather.csv')
enhanced_path = os.path.join(args.root_path, args.data_path)

# Long channel output indices: [12, 15, 11] = max. wv (m/s), raining (s), wv (m/s)
long_channel_indices = [12, 15, 11]

df_enhanced = add_hour_of_day_features(
    original_path, 
    enhanced_path,
    apply_pooling=True,
    pool_channel_indices=long_channel_indices,
    pool_kernel=args.long_channel_pool_kernel,
    pool_stride=args.long_channel_pool_stride
)

## 4. Load Data (Using PatchTST Data Providers)

In [None]:
# Change to PatchTST_supervised directory for data_provider imports
import importlib

os.chdir(os.path.join(repo_root_path, 'PatchTST_supervised'))
print(f"Changed to: {os.getcwd()}")

# Clear cached modules to avoid stale 'utils' shadowing
for m in [
    'utils', 'utils.timefeatures',
    'data_provider', 'data_provider.data_loader', 'data_provider.data_factory'
]:
    if m in sys.modules:
        sys.modules.pop(m, None)

from data_provider.data_factory import data_provider

os.chdir(repo_root_path)

# Create data loaders
train_data, train_loader = data_provider(args, 'train')
val_data, val_loader = data_provider(args, 'val')
test_data, test_loader = data_provider(args, 'test')

print(f"\nData loaded:")
print(f"  Train samples: {len(train_data)}")
print(f"  Val samples: {len(val_data)}")
print(f"  Test samples: {len(test_data)}")

## 5. Create Model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create model
model = PhysicsIntegratedPatchTST(args).float().to(device)

# Get target indices and names
target_indices, target_names = get_target_indices(args.channel_groups)

print(f"\nModel created:")
print(f"  Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print("  Target variables by group:")
for group, info in args.channel_groups.items():
    output_indices = set(info.get('output_indices', []))
    src_names = info.get('names', [])
    tgt_names = [src_names[i] for i, idx in enumerate(info['indices']) if idx in output_indices] if src_names else []
    print(f"    {group}: {', '.join(tgt_names) if tgt_names else '(names not provided)'}")

In [None]:
# Inspect full model architecture
print("\nModel architecture:\n")
print(model)


## 6. Setup Training

In [None]:
# Create optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=1e-4)

# Create scheduler (OneCycleLR as in baseline notebook)
train_steps = len(train_loader)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer=optimizer,
    steps_per_epoch=train_steps,
    pct_start=args.pct_start,
    epochs=args.train_epochs,
    max_lr=args.learning_rate
)

# Create loss function
criterion = nn.MSELoss()

# Create checkpoint directory
checkpoint_path = args.checkpoints
os.makedirs(checkpoint_path, exist_ok=True)

print("Training setup complete")

## 7. Train Model

In [None]:
import time

# Train the model
start_time = time.time()
history = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    optimizer=optimizer,
    scheduler=scheduler,
    criterion=criterion,
    args=args,
    device=device,
    target_indices=target_indices,
    checkpoint_path=checkpoint_path
)
end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Overall losses
axes[0].plot(history['train_losses'], label='Train Loss', marker='o', markersize=3)
axes[0].plot(history['val_losses'], label='Validation Loss', marker='s', markersize=3)
axes[0].plot(history['test_losses'], label='Test Loss', marker='^', markersize=3)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].set_title('Training History')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Target variable losses
for target_name, losses in history['target_variable_losses'].items():
    axes[1].plot(losses, label=target_name.capitalize(), marker='o', markersize=3)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MSE Loss')
axes[1].set_title('Target Variable Losses')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 9. Evaluate Model

In [None]:
# Evaluate on test set
results = evaluate_model(model, test_loader, device, args)

# Get per-channel metrics
per_channel_metrics = evaluate_per_channel(
    results['preds'],
    results['trues'],
    target_indices,
    target_names
)

print("\nPer-Channel Metrics:")
for ch_name, metrics in per_channel_metrics.items():
    print(f"  {ch_name}:")
    print(f"    MSE: {metrics['mse']:.7f}")
    print(f"    MAE: {metrics['mae']:.7f}")
    print(f"    RMSE: {metrics['rmse']:.7f}")

## 10. Visualize Predictions

In [None]:
# Plot predictions for target variables with random sample selection - showing original scale values
%matplotlib inline
import datetime
import os

# Define the 6 features to visualize (in original scale)
features_to_plot = [
    'p (mbar)',           # air pressure
    'T (degC)',           # temperature
    'wv (m/s)',           # wind speed
    'max. wv (m/s)',      # maximum wind speed
    'rain (mm)',          # rainfall amount
    'raining (s)'         # rainfall duration
]

# Get indices of these features in target_names
plot_feature_indices = []
plot_feature_names = []
for feature in features_to_plot:
    if feature in target_names:
        idx = target_names.index(feature)
        plot_feature_indices.append(idx)
        plot_feature_names.append(feature)

print(f"Plotting {len(plot_feature_names)} features: {', '.join(plot_feature_names)}")

# Get scaler from test_data for inverse transform
scaler = test_data.scaler
num_full_features = scaler.mean_.shape[0]  # Total features in dataset

# Select random samples to visualize
num_samples = 3
num_available = results['preds'].shape[0]
sample_indices = np.random.choice(num_available, size=min(num_samples, num_available), replace=False)

# Create figure
fig, axes = plt.subplots(len(plot_feature_names), len(sample_indices), 
                         figsize=(5*len(sample_indices), 4*len(plot_feature_names)))
if len(sample_indices) == 1:
    axes = axes.reshape(-1, 1)
if len(plot_feature_names) == 1:
    axes = axes.reshape(1, -1)

# Process each feature
for plot_idx, (target_idx, feature_name) in enumerate(zip(plot_feature_indices, plot_feature_names)):
    # Get the original channel index for this feature
    ch_idx = target_indices[target_idx]
    
    for col, sample_idx in enumerate(sample_indices):
        # Get normalized sequences
        input_seq_norm = results['inputs'][sample_idx, :, target_idx]
        true_seq_norm = results['trues'][sample_idx, :, target_idx]
        pred_seq_norm = results['preds'][sample_idx, :, target_idx]
        
        # Convert input sequence to original scale
        input_len = len(input_seq_norm)
        input_full = np.zeros((input_len, num_full_features))
        input_full[:, ch_idx] = input_seq_norm
        input_seq = scaler.inverse_transform(input_full)[:, ch_idx]
        
        # Convert true sequence to original scale
        pred_len = len(true_seq_norm)
        true_full = np.zeros((pred_len, num_full_features))
        true_full[:, ch_idx] = true_seq_norm
        true_seq = scaler.inverse_transform(true_full)[:, ch_idx]
        
        # Convert predicted sequence to original scale
        pred_full = np.zeros((pred_len, num_full_features))
        pred_full[:, ch_idx] = pred_seq_norm
        pred_seq = scaler.inverse_transform(pred_full)[:, ch_idx]
        
        # Plot
        input_steps = np.arange(len(input_seq))
        pred_steps = np.arange(len(input_seq), len(input_seq) + len(pred_seq))
        
        ax = axes[plot_idx, col]
        ax.plot(input_steps, input_seq, 'b-', label='Input', linewidth=1.5)
        ax.plot(pred_steps, true_seq, 'g-', label='Ground Truth', linewidth=1.5)
        ax.plot(pred_steps, pred_seq, 'r--', label='Prediction', linewidth=1.5)
        ax.axvline(x=len(input_seq)-1, color='gray', linestyle=':', linewidth=1.5)
        ax.set_title(f'Sample {sample_idx} - {feature_name}', fontsize=10)
        ax.set_xlabel('Time Step', fontsize=9)
        ax.set_ylabel('Value (Original Scale)', fontsize=9)
        ax.legend(fontsize=8)
        ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Save the chart with datetime filename
output_dir = os.path.join(repo_root_path, 'OutputCharts')
os.makedirs(output_dir, exist_ok=True)

timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'prediction_visualization_original_scale_{timestamp}.png'
filepath = os.path.join(output_dir, filename)

fig.savefig(filepath, dpi=300, bbox_inches='tight')
print(f"Chart saved to: {filepath}")