In [1]:
import os
%pwd

'c:\\09_AHFID\\CervicalAI-Screen\\notebook'

In [2]:
os.chdir('../')
%pwd

'c:\\09_AHFID\\CervicalAI-Screen'

In [3]:
# 02_prepare_base_model.ipynb
# Base model architecture optimized for semi-supervised learning

import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import json
from pathlib import Path
import numpy as np

# Load metadata from data ingestion
ARTIFACTS_DIR = Path("artifacts")
with open(ARTIFACTS_DIR / "data_metadata.json", "r") as f:
    metadata = json.load(f)

print("Data Configuration:")
print(f"  Classes: {metadata['classes']}")
print(f"  Number of classes: {metadata['num_classes']}")
print(f"  SSL enabled: {metadata['ssl_enabled']}")
print(f"  Data directory: {metadata['data_dir']}")

  from .autonotebook import tqdm as notebook_tqdm


Data Configuration:
  Classes: ['Negative', 'Positive']
  Number of classes: 2
  SSL enabled: True
  Data directory: artifacts\via_cervix_ssl


In [4]:
# Configuration for semi-supervised learning
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = metadata['num_classes']
MODEL_NAME = 'efficientnet_b0'
IMG_SIZE = 224
FEATURE_DIM = 1280

print(f"Using device: {DEVICE}")

Using device: cpu


In [5]:
class SSLEfficientNet(nn.Module):
    """EfficientNet backbone optimized for semi-supervised learning"""
    def __init__(self, model_name='efficientnet_b0', num_classes=2, dropout_rate=0.5):
        super(SSLEfficientNet, self).__init__()
        
        # Load pretrained backbone
        self.backbone = timm.create_model(model_name, pretrained=True, num_classes=0)
        self.feature_dim = self.backbone.num_features
        
        # Feature projector for consistency regularization
        self.projector = nn.Sequential(
            nn.Linear(self.feature_dim, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 64)
        )
        
        # Classification head with dropout
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(self.feature_dim, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(256, num_classes)
        )
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        """Initialize additional layers"""
        for m in [self.projector, self.classifier]:
            for layer in m:
                if isinstance(layer, nn.Linear):
                    nn.init.xavier_uniform_(layer.weight)
                    nn.init.constant_(layer.bias, 0)
    
    def forward(self, x, return_features=False):
        """Forward pass with optional feature return"""
        features = self.backbone(x)
        logits = self.classifier(features)
        
        if return_features:
            projected_features = self.projector(features)
            return logits, features, projected_features
        
        return logits
    
    def extract_features(self, x):
        """Extract features without classification"""
        with torch.no_grad():
            features = self.backbone(x)
            return features

In [6]:
class ConsistencyLoss(nn.Module):
    """Consistency loss for semi-supervised learning"""
    def __init__(self, consistency_type='mse', temperature=1.0):
        super(ConsistencyLoss, self).__init__()
        self.consistency_type = consistency_type
        self.temperature = temperature
        
    def forward(self, logits1, logits2):
        if self.consistency_type == 'mse':
            prob1 = F.softmax(logits1 / self.temperature, dim=1)
            prob2 = F.softmax(logits2 / self.temperature, dim=1)
            return F.mse_loss(prob1, prob2)
        elif self.consistency_type == 'kl':
            log_prob1 = F.log_softmax(logits1 / self.temperature, dim=1)
            prob2 = F.softmax(logits2 / self.temperature, dim=1)
            return F.kl_div(log_prob1, prob2, reduction='batchmean')
        else:
            raise ValueError(f"Unknown consistency type: {self.consistency_type}")

In [7]:
class PseudoLabelLoss(nn.Module):
    """Pseudo-labeling loss for semi-supervised learning"""
    def __init__(self, threshold=0.95, temperature=1.0):
        super(PseudoLabelLoss, self).__init__()
        self.threshold = threshold
        self.temperature = temperature
        
    def forward(self, logits, return_mask=False):
        probs = F.softmax(logits / self.temperature, dim=1)
        max_probs, pseudo_labels = torch.max(probs, dim=1)
        
        confidence_mask = max_probs >= self.threshold
        
        if confidence_mask.sum() == 0:
            loss = torch.tensor(0.0, device=logits.device)
        else:
            loss = F.cross_entropy(logits[confidence_mask], pseudo_labels[confidence_mask])
        
        if return_mask:
            return loss, confidence_mask, pseudo_labels
        return loss

In [8]:
def create_ssl_model(num_classes=2, model_name='efficientnet_b0', dropout_rate=0.5):
    """Create and return SSL model"""
    model = SSLEfficientNet(model_name=model_name, num_classes=num_classes, dropout_rate=dropout_rate)
    return model

In [9]:
def count_parameters(model):
    """Count model parameters"""
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

In [10]:
def test_model_architecture():
    """Test model architecture with dummy data"""
    print("\nTesting model architecture...")
    
    model = create_ssl_model(NUM_CLASSES).to(DEVICE)
    
    # Test forward pass
    batch_size = 4
    dummy_input = torch.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).to(DEVICE)
    
    with torch.no_grad():
        logits = model(dummy_input)
        print(f"Classification output shape: {logits.shape}")
        
        logits, features, projected = model(dummy_input, return_features=True)
        print(f"Features shape: {features.shape}")
        print(f"Projected features shape: {projected.shape}")
        
        features_only = model.extract_features(dummy_input)
        print(f"Feature extraction shape: {features_only.shape}")
    
    total_params, trainable_params = count_parameters(model)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    
    print("✓ Model architecture test passed")
    return model

In [11]:
def test_loss_functions():
    """Test SSL loss functions"""
    print("\nTesting loss functions...")
    
    batch_size = 8
    num_classes = NUM_CLASSES
    
    logits1 = torch.randn(batch_size, num_classes)
    logits2 = torch.randn(batch_size, num_classes)
    targets = torch.randint(0, num_classes, (batch_size,))
    
    # Test consistency loss
    consistency_loss_fn = ConsistencyLoss(consistency_type='mse')
    cons_loss = consistency_loss_fn(logits1, logits2)
    print(f"Consistency loss (MSE): {cons_loss.item():.4f}")
    
    consistency_loss_fn = ConsistencyLoss(consistency_type='kl')
    cons_loss = consistency_loss_fn(logits1, logits2)
    print(f"Consistency loss (KL): {cons_loss.item():.4f}")
    
    # Test pseudo-label loss
    pseudo_loss_fn = PseudoLabelLoss(threshold=0.7)
    pseudo_loss, mask, pseudo_labels = pseudo_loss_fn(logits1, return_mask=True)
    print(f"Pseudo-label loss: {pseudo_loss.item():.4f}")
    print(f"Confident predictions: {mask.sum().item()}/{batch_size}")
    
    ce_loss = F.cross_entropy(logits1, targets)
    print(f"Cross-entropy loss: {ce_loss.item():.4f}")
    
    print("✓ Loss function tests passed")

In [12]:
def save_model_config():
    """Save model configuration for training"""
    config = {
        "model_architecture": {
            "backbone": MODEL_NAME,
            "num_classes": NUM_CLASSES,
            "img_size": IMG_SIZE,
            "feature_dim": FEATURE_DIM,
            "dropout_rate": 0.5
        },
        "ssl_config": {
            "consistency_loss": "mse",
            "consistency_temperature": 1.0,
            "pseudo_label_threshold": 0.95,
            "pseudo_label_temperature": 1.0
        },
        "training_config": {
            "device": str(DEVICE),
            "mixed_precision": True if str(DEVICE) == 'cuda' else False
        }
    }
    
    config_path = ARTIFACTS_DIR / "model_config.json"
    with open(config_path, "w") as f:
        json.dump(config, f, indent=2)
    
    print(f"Model configuration saved to: {config_path}")
    return config

In [13]:
# Main execution
if __name__ == "__main__":
    print("="*60)
    print("PREPARING BASE MODEL FOR SEMI-SUPERVISED LEARNING")
    print("="*60)
    
    # Test model architecture
    model = test_model_architecture()
    
    # Test loss functions
    test_loss_functions()
    
    # Save model and configuration
    config = save_model_config()
    
    # Save model architecture (empty weights for template)
    model_path = ARTIFACTS_DIR / "ssl_model_template.pth"
    torch.save({
        'model_state_dict': model.state_dict(),
        'config': config,
        'model_class': 'SSLEfficientNet'
    }, model_path)
    
    print(f"\nModel template saved to: {model_path}")
    
    print("\n" + "="*60)
    print("BASE MODEL PREPARATION COMPLETED")
    print("="*60)
    print("Ready for semi-supervised training!")
    
    print(f"\nModel Summary:")
    print(f"  Architecture: {MODEL_NAME}")
    print(f"  Classes: {metadata['classes']}")
    print(f"  Parameters: {count_parameters(model)[0]:,}")
    print(f"  SSL Features: Consistency regularization + Pseudo-labeling")
    print(f"  Device: {DEVICE}")
    print("\nReady for notebook 03_training.ipynb")

PREPARING BASE MODEL FOR SEMI-SUPERVISED LEARNING

Testing model architecture...
Classification output shape: torch.Size([4, 2])
Features shape: torch.Size([4, 1280])
Projected features shape: torch.Size([4, 64])
Feature extraction shape: torch.Size([4, 1280])
Total parameters: 4,705,086
Trainable parameters: 4,705,086
✓ Model architecture test passed

Testing loss functions...
Consistency loss (MSE): 0.1778
Consistency loss (KL): 0.4868
Pseudo-label loss: 0.2017
Confident predictions: 2/8
Cross-entropy loss: 0.4691
✓ Loss function tests passed
Model configuration saved to: artifacts\model_config.json

Model template saved to: artifacts\ssl_model_template.pth

BASE MODEL PREPARATION COMPLETED
Ready for semi-supervised training!

Model Summary:
  Architecture: efficientnet_b0
  Classes: ['Negative', 'Positive']
  Parameters: 4,705,086
  SSL Features: Consistency regularization + Pseudo-labeling
  Device: cpu

Ready for notebook 03_training.ipynb
