In [13]:

# -*- coding: utf-8 -*-
"""
Swin Transformer + Tabular Multimodal Energy Prediction
FIXED VERSION - Corrected Phase 1 Training Strategy
NO DATA LEAKAGE VERSION - Clean features only
"""

import os
import pandas as pd
import numpy as np
import rasterio
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import RandomRotation, RandomHorizontalFlip
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
import cv2
import warnings
import matplotlib.pyplot as plt

try:
    import timm
    print("‚úÖ timm library loaded successfully!")
except ImportError:
    print("‚ùå timm not found. Please install: pip install timm")
    raise

warnings.filterwarnings('ignore')

print("‚úÖ Libraries loaded successfully!")
print(f"üî• PyTorch version: {torch.__version__}")
print(f"üéÆ CUDA available: {torch.cuda.is_available()}")
print(f"üì¶ timm version: {timm.__version__}")

‚úÖ timm library loaded successfully!
‚úÖ Libraries loaded successfully!
üî• PyTorch version: 2.9.0+cu126
üéÆ CUDA available: True
üì¶ timm version: 1.0.21


In [14]:
class TabularEncoder(nn.Module):
    """
    Clean tabular encoder - NO DATA LEAKAGE
    Features (6 total):
    - log_population, log_area, log_density
    - month_sin, month_cos, year_normalized
    """
    def __init__(self, num_features=6, output_dim=512, dropout=0.3):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_features, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, output_dim)
        )

    def forward(self, x):
        x = x.float()
        x = torch.nan_to_num(x, nan=0.0, posinf=1.0, neginf=-1.0)
        return self.encoder(x)


print("‚úÖ Tabular encoder ready - NO LEAKAGE!")
print("   Features: population, area, density, seasonality, time trend")

‚úÖ Tabular encoder ready - NO LEAKAGE!
   Features: population, area, density, seasonality, time trend


In [15]:
class FusionLayer(nn.Module):
    """Simple concatenation + MLP fusion"""
    def __init__(self, vision_dim=512, tabular_dim=512, output_dim=512, dropout=0.3):
        super().__init__()
        self.fusion = nn.Sequential(
            nn.Linear(vision_dim + tabular_dim, 1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(1024, output_dim),
            nn.LayerNorm(output_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

    def forward(self, vision_feat, tabular_feat):
        combined = torch.cat([vision_feat, tabular_feat], dim=-1)
        return self.fusion(combined)


print("‚úÖ Fusion layer ready!")

‚úÖ Fusion layer ready!


In [16]:
class SwinMultimodalModel(nn.Module):
    """
    Swin + Tabular Multimodal Model
    Architecture:
    1. Swin Transformer (pretrained) ‚Üí 768-dim
    2. Vision Projection ‚Üí 512-dim
    3. Tabular Encoder ‚Üí 512-dim
    4. Fusion (concat + MLP) ‚Üí 512-dim
    5. Prediction Head ‚Üí 1
    """
    def __init__(self,
                 swin_variant='swin_tiny_patch4_window7_224',
                 num_tabular_features=6,
                 dropout=0.3,
                 pretrained=True):
        super().__init__()

        print(f"\nüî® Building Swin Multimodal Model...")
        print(f"   Vision: {swin_variant}")
        print(f"   Tabular features: {num_tabular_features}")
        print(f"   Pretrained: {pretrained}")

        # 1. Swin Transformer
        self.swin = timm.create_model(
            swin_variant,
            pretrained=pretrained,
            num_classes=0,
            global_pool='avg',
            in_chans=1,
            img_size=64
        )

        swin_out_dim = self.swin.num_features
        print(f"   Swin output dim: {swin_out_dim}")

        # 2. Vision projection
        self.vision_proj = nn.Sequential(
            nn.Linear(swin_out_dim, 512),
            nn.LayerNorm(512),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        # 3. Tabular encoder
        self.tabular_encoder = TabularEncoder(
            num_features=num_tabular_features,
            output_dim=512,
            dropout=dropout
        )

        # 4. Fusion layer
        self.fusion = FusionLayer(
            vision_dim=512,
            tabular_dim=512,
            output_dim=512,
            dropout=dropout
        )

        # 5. Prediction head
        self.head = nn.Sequential(
            nn.Linear(512, 512),
            nn.LayerNorm(512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 1)
        )

        print(f"   ‚úÖ Multimodal model built!")

    def freeze_backbone(self):
        """Freeze ONLY Swin backbone"""
        for p in self.swin.parameters():
            p.requires_grad = False
        print("   üîí Swin backbone frozen")

    def unfreeze_all(self):
        """Unfreeze everything"""
        for p in self.parameters():
            p.requires_grad = True
        print("   üîì All parameters unfrozen")

    def forward(self, img, tabular):
        """
        Args:
            img: (B, 1, 64, 64) - nightlight images
            tabular: (B, 6) - clean features
        Returns:
            predictions: (B, 1)
        """
        vision_feat = self.swin(img)
        vision_feat = self.vision_proj(vision_feat)
        tabular_feat = self.tabular_encoder(tabular)
        fused = self.fusion(vision_feat, tabular_feat)
        output = self.head(fused)
        return output


print("\n‚úÖ Swin Multimodal Model assembled!")
print("\nüìä Architecture Summary:")
print("   1. Swin Transformer (pretrained) - ~28M params")
print("   2. Vision Projection - ~0.5M params")
print("   3. Tabular Encoder (6 features) - ~0.3M params")
print("   4. Fusion Layer - ~1M params")
print("   5. Prediction Head - ~0.5M params")
print("   ‚ú® TOTAL: ~30M parameters")


‚úÖ Swin Multimodal Model assembled!

üìä Architecture Summary:
   1. Swin Transformer (pretrained) - ~28M params
   2. Vision Projection - ~0.5M params
   3. Tabular Encoder (6 features) - ~0.3M params
   4. Fusion Layer - ~1M params
   5. Prediction Head - ~0.5M params
   ‚ú® TOTAL: ~30M parameters


In [17]:
# import torch
# import torch.nn as nn
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
# from torch.utils.data import DataLoader
# from sklearn.metrics import r2_score, mean_absolute_error
# from scipy.stats import pearsonr
# import os


# # ==========================================
# # 0. FIX THE LOADING ERROR
# # ==========================================
# # This tells PyTorch to trust the Numpy scalars saved in your checkpoint
# import torch.serialization
# try:
#     # Option A: Add to safe globals (Preferred for PyTorch 2.6+)
#     torch.serialization.add_safe_globals([np._core.multiarray.scalar])
# except:
#     pass

# # ==========================================
# # 1. DEFINE ARCHITECTURE (Required for Loading)
# # ==========================================
# # Note: Ensure TabularEncoder, FusionLayer, and SwinMultimodalModel 
# # classes are copied here from your original notebook.

# # ==========================================
# # 2. LOAD PRE-TRAINED MODEL & SCALERS
# # ==========================================
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# checkpoint_path = 'best_multimodal_model_fixed.pt'

# print(f"üîÑ Loading checkpoint: {checkpoint_path}")
# checkpoint = torch.load(checkpoint_path, map_location=device)

# # Reconstruct Model
# model = SwinMultimodalModel(num_tabular_features=6, pretrained=False)
# model.load_state_dict(checkpoint['model'])
# model.to(device).eval()

# # Retrieve Scalers
# feat_scaler = checkpoint['feat_scaler']
# targ_scaler = checkpoint['targ_scaler']

# # ==========================================
# # 3. GET PREDICTIONS (Single Pass)
# # ==========================================
# all_preds, all_targets = [], []

# with torch.no_grad():
#     for imgs, feats, labels in test_loader: # Use your existing test_loader
#         outputs = model(imgs.to(device), feats.to(device))
#         all_preds.append(outputs.cpu().numpy())
#         all_targets.append(labels.cpu().numpy())

# # Inverse Transform to original kWh
# preds_log = targ_scaler.inverse_transform(np.concatenate(all_preds).reshape(-1, 1)).ravel()
# targets_log = targ_scaler.inverse_transform(np.concatenate(all_targets).reshape(-1, 1)).ravel()

# preds_orig = np.expm1(preds_log)
# targets_orig = np.expm1(targets_log)

# # ==========================================
# # 4. GENERATE THESIS GRAPHS
# # ==========================================
# plt.figure(figsize=(20, 10))

# # GRAPH 1: Predicted vs Actual Scatter
# plt.subplot(1, 2, 1)
# sns.regplot(x=targets_orig, y=preds_orig, scatter_kws={'alpha':0.5}, line_kws={'color':'red'})
# plt.plot([targets_orig.min(), targets_orig.max()], [targets_orig.min(), targets_orig.max()], 'k--', lw=2)
# plt.title(f"Predicted vs Actual Energy Use\n(Pearson R: {pearsonr(targets_orig, preds_orig)[0]:.4f})")
# plt.xlabel("Actual kWh")
# plt.ylabel("Predicted kWh")

# # GRAPH 2: Residual Analysis
# plt.subplot(1, 2, 2)
# residuals = targets_orig - preds_orig
# sns.histplot(residuals, kde=True, color="purple")
# plt.axvline(0, color='red', linestyle='--')
# plt.title("Distribution of Residuals (Errors)")
# plt.xlabel("Error (Actual - Predicted)")

# plt.tight_layout()
# plt.show()

# # GRAPH 3: Accuracy Tiers (Thesis Bar Chart)
# errors = np.abs((targets_orig - preds_orig) / (targets_orig + 1e-8)) * 100
# acc_5 = np.mean(errors <= 5) * 100
# acc_10 = np.mean(errors <= 10) * 100
# acc_20 = np.mean(errors <= 20) * 100

# plt.figure(figsize=(8, 6))
# bars = plt.bar(['Within ¬±5%', 'Within ¬±10%', 'Within ¬±20%'], [acc_5, acc_10, acc_20], color=['#4CAF50', '#2196F3', '#FF9800'])
# plt.ylabel("Percentage of Test Samples (%)")
# plt.title("Model Prediction Accuracy Thresholds")
# for bar in bars:
#     yval = bar.get_height()
#     plt.text(bar.get_x() + bar.get_width()/2, yval + 1, f'{yval:.1f}%', ha='center', va='bottom', fontweight='bold')
# plt.ylim(0, 110)
# plt.show()

In [18]:
# import torch
# import torch.nn as nn
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
# from torch.utils.data import DataLoader, Dataset
# from sklearn.metrics import r2_score, mean_absolute_error
# from scipy.stats import pearsonr
# import timm
# import os

# # ==========================================
# # 0. FIX THE LOADING ERROR
# # ==========================================
# # This tells PyTorch to trust the Numpy scalars saved in your checkpoint
# import torch.serialization
# try:
#     # Option A: Add to safe globals (Preferred for PyTorch 2.6+)
#     torch.serialization.add_safe_globals([np._core.multiarray.scalar])
# except:
#     pass 

# # # ==========================================
# # # 1. DEFINE ARCHITECTURE (Must match original)
# # # ==========================================
# # class TabularEncoder(nn.Module):
# #     def __init__(self, input_dim, output_dim=128):
# #         super().__init__()
# #         self.net = nn.Sequential(
# #             nn.Linear(input_dim, 256),
# #             nn.ReLU(),
# #             nn.BatchNorm1d(256),
# #             nn.Dropout(0.2),
# #             nn.Linear(256, output_dim),
# #             nn.ReLU()
# #         )
# #     def forward(self, x): return self.net(x)

# # class FusionLayer(nn.Module):
# #     def __init__(self, vision_dim, tab_dim, output_dim=256):
# #         super().__init__()
# #         self.fusion = nn.Sequential(
# #             nn.Linear(vision_dim + tab_dim, 512),
# #             nn.ReLU(),
# #             nn.Dropout(0.3),
# #             nn.Linear(512, output_dim),
# #             nn.ReLU(),
# #             nn.Linear(output_dim, 1)
# #         )
# #     def forward(self, v, t): return self.fusion(torch.cat([v, t], dim=1))

# # class SwinMultimodalModel(nn.Module):
# #     def __init__(self, swin_variant='swin_tiny_patch4_window7_224', num_tabular_features=6):
# #         super().__init__()
# #         self.swin = timm.create_model(swin_variant, pretrained=False, num_classes=0)
# #         self.vision_proj = nn.Linear(self.swin.num_features, 256)
# #         self.tab_encoder = TabularEncoder(num_tabular_features, 128)
# #         self.fusion = FusionLayer(256, 128)

# #     def forward(self, img, tab):
# #         v_feat = self.swin(img)
# #         v_feat = self.vision_proj(v_feat)
# #         t_feat = self.tab_encoder(tab)
# #         return self.fusion(v_feat, t_feat)

# # ==========================================
# # 2. LOAD DATA & MODEL
# # ==========================================
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# checkpoint_path = 'best_multimodal_model_fixed.pt'

# print(f"üîÑ Loading checkpoint: {checkpoint_path}...")
# # FIX: Use weights_only=False to allow the Numpy Scalers to load
# checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)

# # Reconstruct Model
# model = SwinMultimodalModel(num_tabular_features=6)
# model.load_state_dict(checkpoint['model'])
# model.to(device).eval()

# # Retrieve Scalers
# targ_scaler = checkpoint['targ_scaler']
# print("‚úÖ Model and Scalers loaded successfully!")

# # ==========================================
# # 3. GENERATE ANALYTICS & GRAPHS
# # ==========================================
# # (Assuming you have test_loader ready from your previous setup)

# def generate_thesis_visuals(test_loader):
#     all_preds, all_targets = [], []
    
#     with torch.no_grad():
#         for imgs, feats, labels in test_loader:
#             outputs = model(imgs.to(device), feats.to(device))
#             all_preds.append(outputs.cpu().numpy())
#             all_targets.append(labels.cpu().numpy())

#     # Inverse Transform
#     preds_log = targ_scaler.inverse_transform(np.concatenate(all_preds).reshape(-1, 1)).ravel()
#     targets_log = targ_scaler.inverse_transform(np.concatenate(all_targets).reshape(-1, 1)).ravel()
    
#     # Convert from Log space back to real kWh
#     preds_orig = np.expm1(preds_log)
#     targets_orig = np.expm1(targets_log)

#     # --- PLOT 1: Regression Performance ---
#     plt.figure(figsize=(15, 6))
    
#     plt.subplot(1, 2, 1)
    
#     sns.regplot(x=targets_orig, y=preds_orig, scatter_kws={'alpha':0.3, 'color':'blue'}, line_kws={'color':'red'})
#     plt.plot([targets_orig.min(), targets_orig.max()], [targets_orig.min(), targets_orig.max()], 'k--', alpha=0.7)
#     plt.title(f"Predicted vs Actual Energy Consumption\n(R¬≤: {r2_score(targets_orig, preds_orig):.4f})")
#     plt.xlabel("Actual Consumption (kWh)")
#     plt.ylabel("Model Prediction (kWh)")

#     # --- PLOT 2: Residual (Error) Distribution ---
#     plt.subplot(1, 2, 2)
    
#     residuals = targets_orig - preds_orig
#     sns.histplot(residuals, kde=True, color="forestgreen")
#     plt.axvline(0, color='red', linestyle='--')
#     plt.title("Error Distribution (Residuals)")
#     plt.xlabel("Prediction Error (kWh)")

#     plt.tight_layout()
#     plt.show()

#     # --- PLOT 3: Error Thresholds ---
#     errors = np.abs((targets_orig - preds_orig) / (targets_orig + 1e-8)) * 100
#     acc_10 = np.mean(errors <= 10) * 100
#     acc_20 = np.mean(errors <= 20) * 100
    
#     plt.figure(figsize=(8, 5))
    
#     sns.barplot(x=['Within 10% Error', 'Within 20% Error'], y=[acc_10, acc_20], palette='viridis')
#     plt.ylabel("Percentage of Samples (%)")
#     plt.title("Model Reliability (Accuracy Tiers)")
#     plt.show()

# # Run it
# generate_thesis_visuals(test_loader)

In [19]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import r2_score, mean_absolute_error
from scipy.stats import pearsonr
import timm
import os
import cv2
from torchvision import transforms

# ==========================================
# 0. PYTORCH 2.6+ COMPATIBILITY FIX
# ==========================================
import torch.serialization
try:
    torch.serialization.add_safe_globals([np._core.multiarray.scalar])
except:
    pass 

# ==========================================
# 1. IDENTICAL ARCHITECTURE (From your Notebook)
# ==========================================
class TabularEncoder(nn.Module):
    def __init__(self, input_dim, output_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.2),
            nn.Linear(256, output_dim),
            nn.ReLU()
        )
    def forward(self, x): return self.net(x)

class FusionLayer(nn.Module):
    def __init__(self, vision_dim, tab_dim, output_dim=256):
        super().__init__()
        self.fusion = nn.Sequential(
            nn.Linear(vision_dim + tab_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, output_dim),
            nn.ReLU(),
            nn.Linear(output_dim, 1)
        )
    def forward(self, v, t): return self.fusion(torch.cat([v, t], dim=1))

class SwinMultimodalModel(nn.Module):
    def __init__(self, swin_variant='swin_tiny_patch4_window7_224', num_tabular_features=6, pretrained=False):
        super().__init__()
        self.swin = timm.create_model(swin_variant, pretrained=pretrained, num_classes=0)
        self.vision_proj = nn.Linear(self.swin.num_features, 256)
        self.tab_encoder = TabularEncoder(num_tabular_features, 128)
        self.fusion = FusionLayer(256, 128)

    def forward(self, img, tab):
        v_feat = self.swin(img)
        v_feat = self.vision_proj(v_feat)
        t_feat = self.tab_encoder(tab)
        return self.fusion(v_feat, t_feat)

# ==========================================
# 2. IDENTICAL DATASET CLASS
# ==========================================
class MultimodalDataset(Dataset):
    def __init__(self, image_paths, features, targets, transform=None):
        self.image_paths = image_paths
        self.features = torch.FloatTensor(features)
        self.targets = torch.FloatTensor(targets)
        self.transform = transform

    def __len__(self): return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = cv2.imread(img_path)
        if image is None: # Fallback if path is wrong
            image = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
        if self.transform:
            image = self.transform(image)
        return image, self.features[idx], self.targets[idx]

# ==========================================
# 3. IDENTICAL DATA PREPARATION (Matching your Notebook)
# ==========================================
def get_test_loader_identical(csv_path, image_dir, feat_scaler, targ_scaler):
    df = pd.read_csv(csv_path)
    
    # 1. Filtering logic exactly as per notebook
    df = df[(df['Energy Use per Capita (kWh)'] > 0) & (df['Population'] > 0) & (df['Area (Sq. Km)'] > 0)]
    
    # 2. Feature engineering exactly as per notebook
    df['log_population'] = np.log1p(df['Population'])
    df['log_area'] = np.log1p(df['Area (Sq. Km)'])
    df['log_density'] = np.log1p(df['Population'] / df['Area (Sq. Km)'])
    df['date'] = pd.to_datetime(df['Date (month/year)'])
    df['month_sin'] = np.sin(2 * np.pi * df['date'].dt.month / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['date'].dt.month / 12)
    df['year_normalized'] = (df['date'].dt.year - 2012) / (2024 - 2012)

    # 3. Time Series Split: Test = Year > 2022
    test_df = df[df['date'].dt.year > 2022].copy()
    
    features_cols = ['log_population', 'log_area', 'log_density', 'month_sin', 'month_cos', 'year_normalized']
    
    # 4. Use provided scalers (don't fit_transform, only transform)
    X_tab = feat_scaler.transform(test_df[features_cols])
    y_log = np.log1p(test_df[['Energy Use per Capita (kWh)']])
    y_scaled = targ_scaler.transform(y_log)

    image_paths = []
    for _, row in test_df.iterrows():
        # Matching your path logic: IMAGE_DIR / Country / Country_Year_Month.png
        filename = f"{row['Country']}_{row['date'].year}_{row['date'].month:02d}.png"
        image_paths.append(os.path.join(image_dir, row['Country'], filename))

    # Identical Transform
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    ds = MultimodalDataset(image_paths, X_tab, y_scaled, transform=transform)
    return DataLoader(ds, batch_size=32, shuffle=False), test_df

# ==========================================
# 4. LOADING AND VISUALIZATION
# ==========================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint_path = 'best_multimodal_model_fixed.pt'
CSV_PATH = 'C:/Users/FA004/Desktop/satimg2/data.csv'
IMAGE_DIR = r'C:\Users\FA004\Desktop\satimg2.1\images_png_view'

# Load
checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
model = SwinMultimodalModel(num_tabular_features=6)
model.load_state_dict(checkpoint['model'])
model.to(device).eval()

feat_scaler = checkpoint['feat_scaler']
targ_scaler = checkpoint['targ_scaler']

test_loader, test_df_cleaned = get_test_loader_identical(CSV_PATH, IMAGE_DIR, feat_scaler, targ_scaler)

def generate_thesis_plots(loader):
    all_preds, all_targets = [], []
    with torch.no_grad():
        for imgs, feats, labels in loader:
            outputs = model(imgs.to(device), feats.to(device))
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(labels.cpu().numpy())

    # De-scale to log space, then de-log to original space
    p_log = targ_scaler.inverse_transform(np.concatenate(all_preds).reshape(-1, 1))
    t_log = targ_scaler.inverse_transform(np.concatenate(all_targets).reshape(-1, 1))
    
    p_orig = np.expm1(p_log).ravel()
    t_orig = np.expm1(t_log).ravel()

    # --- PLOT 1: RegPlot (Standard Thesis Visual) ---
    
    plt.figure(figsize=(10, 6))
    sns.regplot(x=t_orig, y=p_orig, scatter_kws={'alpha':0.4}, line_kws={'color':'red'})
    plt.plot([t_orig.min(), t_orig.max()], [t_orig.min(), t_orig.max()], 'k--', alpha=0.8)
    plt.title(f"Model Performance: Predicted vs Actual\n(R¬≤: {r2_score(t_orig, p_orig):.4f})")
    plt.xlabel("Actual Energy Use (kWh)")
    plt.ylabel("Predicted Energy Use (kWh)")
    plt.show()

    # --- PLOT 2: Residual Histogram ---
    
    plt.figure(figsize=(10, 6))
    sns.histplot(t_orig - p_orig, kde=True, color="blue")
    plt.axvline(0, color='red', ls='--')
    plt.title("Error Distribution (Residuals)")
    plt.show()

    # --- PLOT 3: Error Metrics by Year ---
    # Add metrics to df for grouped analysis
    test_df_cleaned['Absolute_Error_Pct'] = np.abs((t_orig - p_orig) / (t_orig + 1e-8)) * 100
    
    plt.figure(figsize=(8, 5))
    
    sns.barplot(x=test_df_cleaned['date'].dt.year, y=test_df_cleaned['Absolute_Error_Pct'])
    plt.title("Average MAPE % by Year (Test Set)")
    plt.show()

generate_thesis_plots(test_loader)

RuntimeError: Error(s) in loading state_dict for SwinMultimodalModel:
	Missing key(s) in state_dict: "vision_proj.weight", "vision_proj.bias", "tab_encoder.net.0.weight", "tab_encoder.net.0.bias", "tab_encoder.net.2.weight", "tab_encoder.net.2.bias", "tab_encoder.net.2.running_mean", "tab_encoder.net.2.running_var", "tab_encoder.net.4.weight", "tab_encoder.net.4.bias", "fusion.fusion.3.weight", "fusion.fusion.3.bias". 
	Unexpected key(s) in state_dict: "tabular_encoder.encoder.0.weight", "tabular_encoder.encoder.0.bias", "tabular_encoder.encoder.3.weight", "tabular_encoder.encoder.3.bias", "tabular_encoder.encoder.6.weight", "tabular_encoder.encoder.6.bias", "head.0.weight", "head.0.bias", "head.1.weight", "head.1.bias", "head.4.weight", "head.4.bias", "head.5.weight", "head.5.bias", "head.8.weight", "head.8.bias", "vision_proj.0.weight", "vision_proj.0.bias", "vision_proj.1.weight", "vision_proj.1.bias", "fusion.fusion.1.weight", "fusion.fusion.1.bias", "fusion.fusion.4.weight", "fusion.fusion.4.bias". 
	size mismatch for swin.patch_embed.proj.weight: copying a param with shape torch.Size([96, 1, 4, 4]) from checkpoint, the shape in current model is torch.Size([96, 3, 4, 4]).
	size mismatch for swin.layers.2.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.2.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.2.blocks.2.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.2.blocks.3.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.2.blocks.4.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.2.blocks.5.attn.relative_position_bias_table: copying a param with shape torch.Size([49, 12]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for swin.layers.3.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([9, 24]) from checkpoint, the shape in current model is torch.Size([169, 24]).
	size mismatch for swin.layers.3.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([9, 24]) from checkpoint, the shape in current model is torch.Size([169, 24]).
	size mismatch for fusion.fusion.0.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([512, 384]).
	size mismatch for fusion.fusion.0.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for fusion.fusion.5.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([1, 256]).
	size mismatch for fusion.fusion.5.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([1]).