In [2]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128

Looking in indexes: https://download.pytorch.org/whl/cu128
Collecting torch
  Downloading https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313-win_amd64.whl.metadata (27 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu128/torchaudio-2.7.1%2Bcu128-cp313-cp313-win_amd64.whl.metadata (6.8 kB)
Collecting filelock (from torch)
  Using cached https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting fsspec (from torch)
  Using cached https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl.metadata (1

In [3]:
import torch
import torchvision

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class PhysionetTransformer(nn.Module):
    """
    Transformer-based encoder for the Physionet dataset.
    This model is adapted to handle longer sequences and a different feature dimension.
    """
    def __init__(self, input_dim=6, d_model=128, nhead=4, num_layers=3, num_classes=3):
        """
        Initializes the PhysionetTransformer model.

        Args:
            input_dim (int): The number of features in the input data (default: 6).
            d_model (int): The dimensionality of the model's embeddings (default: 128).
            nhead (int): The number of heads in the multi-head attention mechanism (default: 4).
            num_layers (int): The number of Transformer encoder layers (default: 3).
            num_classes (int): The number of output classes for classification (default: 3).
        """
        super().__init__()
        # Project raw features to model dimension
        self.embed = nn.Linear(input_dim, d_model)

        # Stacked Transformer encoder layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=256,
            dropout=0.1,
            batch_first=False # Expects (Seq_Len, Batch, Features)
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Classification head on pooled CLS-like token
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (Batch, Seq_Len, Features), e.g., [B, 1000, 6].

        Returns:
            torch.Tensor: The output logits of shape (Batch, num_classes).
            torch.Tensor: The extracted class token embedding of shape (Batch, d_model).
        """
        # x: [B, T=1000, 6]
        h = self.embed(x)               # -> [B, 1000, 128]
        h = h.permute(1, 0, 2)          # -> [T=1000, B, 128] for Transformer
        out = self.transformer(h)       # -> [T=1000, B, 128]
        cls_token = out[0]              # Use the output of the first time step as the summary token
        logits = self.classifier(cls_token) # -> [B, 3]
        return logits, cls_token

In [5]:
phy = PhysionetTransformer()
print(phy)

PhysionetTransformer(
  (embed): Linear(in_features=6, out_features=128, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (classifier): Linear(in_features=128, out_features=3, bias=True)
)




In [1]:
# --- models/encoders.py ---

import torch.nn as nn

class UAHTransformer(nn.Module):
    """Definition for the UAH-Driveset Transformer Encoder."""
    def __init__(self, input_dim=9, d_model=128, nhead=4, num_layers=3, num_classes=3):
        super().__init__()
        self.embed = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=256, dropout=0.1, batch_first=False
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        h = self.embed(x).permute(1, 0, 2)
        out = self.transformer(h)
        cls_token = out[0]
        logits = self.classifier(cls_token)
        return logits, cls_token

class PhysionetTransformer(nn.Module):
    """Definition for the Physionet Transformer Encoder."""
    def __init__(self, input_dim=6, d_model=128, nhead=4, num_layers=3, num_classes=3):
        super().__init__()
        self.embed = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=256, dropout=0.1, batch_first=False
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        h = self.embed(x).permute(1, 0, 2)
        out = self.transformer(h)
        cls_token = out[0]
        logits = self.classifier(cls_token)
        return logits, cls_token

In [2]:
Uah= UAHTransformer()
print(Uah)

UAHTransformer(
  (embed): Linear(in_features=9, out_features=128, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (classifier): Linear(in_features=128, out_features=3, bias=True)
)




In [3]:
phy = PhysionetTransformer()
print(phy)

PhysionetTransformer(
  (embed): Linear(in_features=6, out_features=128, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (classifier): Linear(in_features=128, out_features=3, bias=True)
)




In [None]:
# --- py ---

import torch

# -- Environment --
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# -- Data & File Paths --
UAH_DATA_PATH = '/csehome/p23iot002/Missing_Mod/UAH_Encoder/UAH_data_all.h5'
PHYSIO_DATA_PATH = '/csehome/p23iot002/Missing_Mod/Physionet_Encoder/Physionet_all.h5'
UAH_MODEL_PATH = '/csehome/p23iot002/Missing_Mod/UAH_Encoder/uah_best_model_v3.pth'
PHYSIO_MODEL_PATH = '/csehome/p23iot002/Missing_Mod/Physionet_Encoder/physionet_best_model_v3.pth'

# -- Model Saving Paths --
BEST_STAGE1_MODEL_PATH = "best_stage1_model_v2.pth"
BEST_STAGE2_MODEL_PATH = "best_stage2_model_v2.pth"
BEST_STAGE3_MODEL_PATH = "best_stage3_model_v2.pth"
LAST_EPOCH_MODEL_PATH = "last_epoch_model_v2.pth"

# -- Data Splitting (UPDATED) --
VALIDATION_SPLIT = 0.1
TEST_SPLIT = 0.1
SHUFFLE_DATASET = True
RANDOM_SEED = 42

# -- Model Hyperparameters --
ENCODER_DIM = 128
SHARED_DIM = 128
HIDDEN_DIM = 256
NUM_CLASSES = 3
ATTENTION_HEADS = 4

# -- Training Hyperparameters --
BATCH_SIZE = 128
INITIAL_LR = 1e-4
NUM_EPOCHS_STAGE1 = 20
NUM_EPOCHS_STAGE2 = 50
NUM_EPOCHS_STAGE3 = 30

# -- Stage-Specific Loss Weights --
STAGE1_ALPHA, STAGE1_BETA = 0.05, 0.05 # Contrastive, Consistency
STAGE2_ALPHA, STAGE2_BETA = 0.2, 0.15
STAGE3_ALPHA, STAGE3_BETA = 0.3, 0.25

# -- General Loss Component Weights --
PHYSIO_ONLY_LOSS_WEIGHT = 1.5
GAMMA = 0.1       # Regularization
DELTA = 0.2       # Auxiliary classification
EPSILON = 0.15    # Cross-modal reconstruction
TEMPERATURE = 0.1

# -- SOLUTION 1: MODALITY-SPECIFIC LEARNING RATE CONFIGS --
ENABLE_MODALITY_SPECIFIC_LR = True
UAH_LR_FACTOR = 1.0      # UAH encoder gets a standard learning rate
PHYSIO_LR_FACTOR = 1.5   # Physio encoder gets a higher learning rate
'''
# -- SOLUTION 4: BALANCED PROGRESSIVE UNFREEZING CONFIGS --
ENABLE_MODALITY_SPECIFIC_UNFREEZING = True

UAH_UNFREEZE_SCHEDULE = {
    10: ['classifier'],
    20: ['transformer.layers.2'],
    30: ['transformer.layers.1', 'transformer.layers.0'],
    50: ['embed']
}
PHYSIO_UNFREEZE_SCHEDULE = {
    8: ['classifier'],      # Earlier unfreezing for the weaker modality
    15: ['transformer.layers.2'],
    25: ['transformer.layers.1', 'transformer.layers.0'],
    40: ['embed']
}
'''

"\n# -- SOLUTION 4: BALANCED PROGRESSIVE UNFREEZING CONFIGS --\nENABLE_MODALITY_SPECIFIC_UNFREEZING = True\n\nUAH_UNFREEZE_SCHEDULE = {\n    10: ['classifier'],\n    20: ['transformer.layers.2'],\n    30: ['transformer.layers.1', 'transformer.layers.0'],\n    50: ['embed']\n}\nPHYSIO_UNFREEZE_SCHEDULE = {\n    8: ['classifier'],      # Earlier unfreezing for the weaker modality\n    15: ['transformer.layers.2'],\n    25: ['transformer.layers.1', 'transformer.layers.0'],\n    40: ['embed']\n}\n"

In [9]:
# --- models/fusion.py ---

import torch
import torch.nn as nn
#import configs
#from .encoders import UAHTransformer, PhysionetTransformer

class SharedLatentSpaceFusion(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.uah_encoder = self._load_pretrained_encoder(UAH_MODEL_PATH, 'uah')
        self.physio_encoder = self._load_pretrained_encoder(PHYSIO_MODEL_PATH, 'physio')
        
        # FIX 2: Corrected unfreeze schedule with valid layer names and reachable epochs
        self.unfreeze_schedule = {
            10: ['transformer.layers.2'],  # last encoder block
            20: ['transformer.layers.1'],  # mid block
            30: ['transformer.layers.0'],  # first block
            40: ['embed']                  # input token projector
        }
        
        for param in self.uah_encoder.parameters(): param.requires_grad = False
        for param in self.physio_encoder.parameters(): param.requires_grad = False
        
        # --- Main Fusion Components ---
        self.uah_to_shared = nn.Sequential(
            nn.Linear(ENCODER_DIM, SHARED_DIM), nn.LayerNorm(SHARED_DIM), nn.ReLU(), nn.Dropout(0.1)
        )
        self.physio_to_shared = nn.Sequential(
            nn.Linear(ENCODER_DIM, SHARED_DIM), nn.LayerNorm(SHARED_DIM), nn.ReLU(), nn.Dropout(0.1)
        )
        self.cross_modal_attention = nn.MultiheadAttention(
            embed_dim=SHARED_DIM, num_heads=ATTENTION_HEADS, dropout=0.1, batch_first=True
        )
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=SHARED_DIM, nhead=ATTENTION_HEADS, dim_feedforward=SHARED_DIM * 2, dropout=0.1, batch_first=True
        )
        self.shared_transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.fusion_gate = nn.Sequential(
            nn.Linear(SHARED_DIM * 2, SHARED_DIM), nn.Tanh(), nn.Linear(SHARED_DIM, 1), nn.Sigmoid()
        )
        self.classifier = nn.Sequential(
            nn.Linear(SHARED_DIM, HIDDEN_DIM), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(HIDDEN_DIM, HIDDEN_DIM // 2), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(HIDDEN_DIM // 2, NUM_CLASSES)
        )

        # --- Auxiliary and Reconstruction Components ---
        self.uah_aux_classifier = nn.Sequential(
            nn.Linear(SHARED_DIM, HIDDEN_DIM // 2), nn.ReLU(),
            nn.Linear(HIDDEN_DIM // 2, NUM_CLASSES)
        )
        self.physio_aux_classifier = nn.Sequential(
            nn.Linear(SHARED_DIM, HIDDEN_DIM // 2), nn.ReLU(),
            nn.Linear(HIDDEN_DIM // 2, NUM_CLASSES)
        )
        self.uah_to_physio_decoder = nn.Sequential(
            nn.Linear(SHARED_DIM, SHARED_DIM), nn.ReLU(),
            nn.Linear(SHARED_DIM, SHARED_DIM)
        )
        self.physio_to_uah_decoder = nn.Sequential(
            nn.Linear(SHARED_DIM, SHARED_DIM), nn.ReLU(),
            nn.Linear(SHARED_DIM, SHARED_DIM)
        )
        
    def _load_pretrained_encoder(self, model_path, modality_type):
        try:
            encoder = UAHTransformer() if modality_type == 'uah' else PhysionetTransformer()
            encoder.load_state_dict(torch.load(model_path, map_location=DEVICE))
            # The 'classifier' layer is replaced, so unfreezing it by name won't work.
            encoder.classifier = nn.Identity()
            print(f"Successfully loaded pretrained {modality_type} encoder.")
            return encoder
        except FileNotFoundError:
            print(f"FATAL: Pretrained model not found at {model_path}. Please update py.")
            exit()

    def progressive_unfreeze(self, epoch):
        """Unfreezes parts of the encoders based on the current epoch."""
        if epoch in self.unfreeze_schedule:
            layers_to_unfreeze = self.unfreeze_schedule[epoch]
            print(f"\nEpoch {epoch}: Unfreezing layers containing: {layers_to_unfreeze}")
            # Ensure layers_to_unfreeze is a list
            if not isinstance(layers_to_unfreeze, list):
                layers_to_unfreeze = [layers_to_unfreeze]

            for pattern in layers_to_unfreeze:
                self._unfreeze_specific_layers(self.uah_encoder, pattern)
                self._unfreeze_specific_layers(self.physio_encoder, pattern)
            return True 
        return False

    def _unfreeze_specific_layers(self, encoder, layer_pattern):
        for name, param in encoder.named_parameters():
            if layer_pattern in name:
                param.requires_grad = True

    def forward(self, uah_seq, physio_seq, modality_mask):
        batch_size = uah_seq.size(0)
        modality_embeddings = {}
        aux_logits = {}

        uah_present_mask = modality_mask[:, 0]
        physio_present_mask = modality_mask[:, 1]
        
        # Initialize full batch tensors
        full_uah = torch.zeros(batch_size, SHARED_DIM, device=DEVICE)
        full_physio = torch.zeros(batch_size, SHARED_DIM, device=DEVICE)

        if uah_present_mask.any():
            # Use set_grad_enabled for safety, though requires_grad should handle it
            with torch.set_grad_enabled(any(p.requires_grad for p in self.uah_encoder.parameters())):
                _, uah_raw = self.uah_encoder(uah_seq[uah_present_mask])
            uah_shared = self.uah_to_shared(uah_raw)
            aux_logits['uah'] = self.uah_aux_classifier(uah_shared)
            full_uah[uah_present_mask] = uah_shared
        
        if physio_present_mask.any():
            with torch.set_grad_enabled(any(p.requires_grad for p in self.physio_encoder.parameters())):
                _, physio_raw = self.physio_encoder(physio_seq[physio_present_mask])
            physio_shared = self.physio_to_shared(physio_raw)
            aux_logits['physio'] = self.physio_aux_classifier(physio_shared)
            full_physio[physio_present_mask] = physio_shared

        # Always populate modality_embeddings for consistent loss calculation
        modality_embeddings['uah'] = full_uah
        modality_embeddings['physio'] = full_physio
        
        both_mask = uah_present_mask & physio_present_mask
        uah_only_mask = uah_present_mask & ~physio_present_mask
        physio_only_mask = ~uah_present_mask & physio_present_mask

        fused_shared = torch.zeros_like(full_uah)

        if both_mask.any():
            uah_emb, physio_emb = full_uah[both_mask].unsqueeze(1), full_physio[both_mask].unsqueeze(1)
            uah_attended, _ = self.cross_modal_attention(uah_emb, physio_emb, physio_emb)
            physio_attended, _ = self.cross_modal_attention(physio_emb, uah_emb, uah_emb)
            concat_features = torch.cat([uah_attended.squeeze(1), physio_attended.squeeze(1)], dim=1)
            fusion_weight = self.fusion_gate(concat_features)
            fused_shared[both_mask] = (fusion_weight * uah_attended.squeeze(1) + (1 - fusion_weight) * physio_attended.squeeze(1))
        
        if uah_only_mask.any():
            fused_shared[uah_only_mask] = full_uah[uah_only_mask]
        
        if physio_only_mask.any():
            fused_shared[physio_only_mask] = full_physio[physio_only_mask]

        refined_shared = self.shared_transformer(fused_shared.unsqueeze(1)).squeeze(1)
        main_logits = self.classifier(refined_shared)
        
        return main_logits, aux_logits, refined_shared, modality_embeddings

In [None]:
sLat = SharedLatentSpaceFusion()
print(sLat)

FATAL: Pretrained model not found at /csehome/p23iot002/Missing_Mod/UAH_Encoder/uah_best_model_v3.pth. Please update py.
FATAL: Pretrained model not found at /csehome/p23iot002/Missing_Mod/Physionet_Encoder/physionet_best_model_v3.pth. Please update py.


AttributeError: 'NoneType' object has no attribute 'parameters'

: 