### Set working dir

In [1]:
import os
WORKING_DIR = "/Users/thuang/Documents/Personal/code/microscopy-with-ml"
os.chdir(WORKING_DIR)
print(f"Working directory: {os.getcwd()}")

Working directory: /Users/thuang/Documents/Personal/code/microscopy-with-ml


### Hyperparameter tuning (over encoders) example
* **Working example of Augmentation**
* optuna is just organizing multiple runs in a wrap. Can simply run the range of my desired tuning in a loop and compare it in mlflow?

In [2]:
import torch
import segmentation_models_pytorch as smp
import numpy as np
import optuna
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision.transforms as transforms
import cv2
import os

from mwm.components.image_processing import get_gt_mask_png, read_image_png

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# 🔹 Define Dataset Class
class NucleiDataset(Dataset):
    def __init__(self, image_dir, mask_dir, image_list, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_list = image_list # This is when image_list is pre-selected for train/val/test split
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_list[idx])
        mask_path = os.path.join(self.mask_dir, self.image_list[idx])  # Assuming masks have the same name

        # Read image and mask
        image = read_image_png(img_path)
        mask_raw = read_image_png(mask_path)

        # Normalize & Convert to tensors
        image = image / 255.0  # when import from preprocessed image dir: /norm_images
        mask = get_gt_mask_png(mask_raw[:,:,0])[:,:,1:] # leave out the 1st channel (empty), [0 1]

        # image = cv2.imread(self.image_paths[idx])
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented["image"]
            mask = augmented["mask"]

        # image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        mask = torch.tensor(mask, dtype=torch.float32).permute(2, 0, 1)

        # mask = mask.unsqueeze(0).float() / 255.0  # Normalize mask

        return image, mask

# 🔹 Define Data Augmentation
transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])


#########
# 🔹 Load Image & Mask Paths
image_dir = "artifacts/data_ingestion/norm_images"
mask_dir = "artifacts/data_ingestion/masks"
training_set_file = "artifacts/data_ingestion/metadata/training.txt"
image_list = [line.strip() for line in open(training_set_file, "r")]

# image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir)]
# mask_paths = [os.path.join(mask_dir, fname) for fname in os.listdir(mask_dir)]

dataset = NucleiDataset(image_dir, mask_dir, image_list, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


#########
# 🔹 Define Training Function
def train_model(encoder_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load U-Net with different encoders
    model = smp.Unet(encoder_name=encoder_name, encoder_weights="imagenet", classes=2, activation="sigmoid")
    model = model.to(device)

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    epochs = 3  # For quick testing, increase for better results
    model.train()
    
    for epoch in range(epochs):
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()

    return evaluate_model(model)

# 🔹 Define Evaluation Function (IoU Score)
def evaluate_model(model):
    model.eval()
    total_iou = 0
    count = 0
    device = next(model.parameters()).device

    with torch.no_grad():
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)
            outputs = torch.sigmoid(model(images))  # Convert logits to probabilities
            outputs = (outputs > 0.5).float()  # Threshold predictions

            intersection = (outputs * masks).sum()
            union = (outputs + masks).sum() - intersection
            iou = intersection / (union + 1e-6)  # Avoid division by zero
            total_iou += iou.item()
            count += 1

    return total_iou / count  # Average IoU score

# 🔹 Optimize Encoder Selection Using Optuna
def objective(trial):
    encoders = ["resnet34", "efficientnet-b0", "mobilenet_v2", "se_resnext50_32x4d"]
    encoder_name = trial.suggest_categorical("encoder_name", encoders)

    iou_score = train_model(encoder_name)
    return iou_score  # Higher IoU is better

# 🔹 Run Hyperparameter Search
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=4)

# 🔹 Print Best Encoder
best_encoder = study.best_params["encoder_name"]
print(f"🏆 Best Encoder: {best_encoder}")


### Compare encoders/pretrained weights in a visual/qualitative sense

#### Classic CNNs

In [None]:
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
from skimage import measure
import mlflow

from mwm import logger
from mwm.constants import *
from mwm.utils.common import read_yaml, load_json
from mwm.components.model_architecture import *
from mwm.components.dataset import *
from mwm.components.image_processing import read_image_png, post_processing_watershed_2ch
from mwm.components.metrics import iou_object_labels, measures_at


class EvaluationProcessor2Channel:
    def __init__(self):
        self.results = []
        self.thresholds = np.round(np.arange(0.5, 1.0, 0.05), 2)


    def prep_evaluation(self, prediction, mask_path):
        self.sample_name = os.path.basename(mask_path).split(".")[0]

        # Convert orginal mask to label
        mask_raw = read_image_png(mask_path)
        self.labels_gt = measure.label(mask_raw[:,:,0], background=0)

        # Convert prediction output to label: 
        prediction = prediction.permute(1, 2, 0).cpu().numpy()
        # TODO: add future denoising step before thresholding
        prediction = (prediction > 0.5).astype(np.uint8)
        reconstruction = post_processing_watershed_2ch(prediction) # key post-processing logic
        self.labels_pred = reconstruction[:mask_raw.shape[0], :mask_raw.shape[1]] # remove padding
            

    def update_metrics(self):
        iou_matrix = iou_object_labels(self.labels_gt, self.labels_pred)
        if iou_matrix.size == 0:
            mean_object_iou = 0.0
        else:
            mean_object_iou = np.max(iou_matrix, axis=0).mean()
        
        # Calculate F1 score at all thresholds
        for t in self.thresholds:
            f1, precision, recall, jaccard, tp, fp, fn = measures_at(t, iou_matrix)
            res = {
                "Sample": self.sample_name, 
                "Threshold": t, 
                "F1": f1, 
                "Precision": precision, 
                "Recall": recall, 
                "Jaccard": jaccard, 
                "MeanObjectIoU": mean_object_iou,
                "TP": tp, 
                "FP": fp, 
                "FN": fn
                }
            self.results.append(res)
    

    def log_key_metrics_to_mlflow(self):
        df = pd.DataFrame(self.results)
        df_agg = df.drop(columns=["Sample"]).groupby("Threshold").mean().reset_index().sort_values("Threshold", ascending=True)
        df_agg_list = df_agg.to_dict("records")
        for row_dict in df_agg_list:
            metrics = {k: v for k, v in row_dict.items() if k != "Threshold"}
            mlflow.log_metrics(metrics, step=int(row_dict["Threshold"]*100))
        mlflow.log_metric("MAF1", df_agg["F1"].mean())
        mlflow.log_metric("MAPrecision", df_agg["Precision"].mean())
        mlflow.log_metric("MARecall", df_agg["Recall"].mean())
        mlflow.log_metric("MAJaccard", df_agg["Jaccard"].mean())
        mlflow.log_param("thresholds", self.thresholds)


    def save_results(self, output_path):
        df = pd.DataFrame(self.results)
        df.to_csv(output_path, index=False)


class Evaluator():
    def __init__(
        self,
        encoder,
        encoder_weights = "imagenet",
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = load_json(params_filepath)

        # Make & load model
        # self.model_path = self.params.model_file_path
        self.encoder = encoder
        self.encoder_weights = encoder_weights
        
        # self.model = make_model(self.params.network)
        # self.model.load_state_dict(torch.load(self.model_path))
        # logger.info(f"Model loaded from: {self.model_path}")
        self.model = smp.Unet(self.encoder, encoder_weights=self.encoder_weights, in_channels=3, classes=2, activation="sigmoid")

        # Make dataset
        self.image_dir = os.path.join(self.config.data_ingestion.unzip_dir, self.config.dataset.image_dir)
        self.mask_dir = os.path.join(self.config.data_ingestion.unzip_dir, self.config.dataset.mask_dir)
        with open(os.path.join(self.config.data_ingestion.unzip_dir, self.config.dataset.test_set_file), "r") as f:
            self.image_list_test = f.read().splitlines()
        self.test_dataset = make_dataset(self.params.dataset, self.image_dir, self.mask_dir, self.image_list_test)

        # Make save path (optional)
        if self.params.save_predictions:
            model_name = os.path.basename(self.encoder).split(".")[0]
            self.save_dir = os.path.join(self.config.evaluation.evaluation_dir, f"{model_name}_{self.encoder_weights}_predictions")
            os.makedirs(self.save_dir, exist_ok=True)


    def handle_device(self):
        # Move model to GPU if available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(self.device)


    def evaluate(self):
        self.evaluate_processor = EvaluationProcessor2Channel()
        # Set model to evaluation mode
        self.model.eval()

        # Evaluate individual sample without batching
        batch_progress_bar = tqdm(self.test_dataset, desc=f"Evaluation", leave=True)
        with torch.no_grad():
            for image, _ in batch_progress_bar:
                mask_path = self.test_dataset.get_mask_path()
                image = image.to(self.device)

                # TODO: move to Dataset
                # Pad images to match the target size
                image = self.pad_images(image)

                # TODO: any potnetial issue with not using data loader?
                image = image.to(self.device).unsqueeze(0)  # Add batch dimension

                # Get prediction
                output = self.model(image).squeeze()
            
                # Evaluate
                self.evaluate_processor.prep_evaluation(output, mask_path)
                self.evaluate_processor.update_metrics()

                if self.params.save_predictions:
                    save_path = os.path.join(self.save_dir, os.path.basename(mask_path))

                    mask_pred = output.permute(1, 2, 0).cpu().numpy()
                    mask_pred_uint8 = (mask_pred > 0.5).astype(np.uint8)
                    empty_channel = np.zeros_like(mask_pred_uint8[:,:,0])
                    mask_pred_uint8 = np.stack([mask_pred_uint8[:,:,1], empty_channel, mask_pred_uint8[:,:,0]], axis=-1) * 255 # cv2 uses BGR
                    cv2.imwrite(save_path, mask_pred_uint8)

        mlflow.set_experiment("Encoder/Architecture Search")
        with mlflow.start_run():
            mlflow.set_tag("mlflow.runName", f"{self.encoder}_{self.encoder_weights}")

            self.evaluate_processor.log_key_metrics_to_mlflow()

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            save_path = os.path.join(
                self.config.evaluation.evaluation_dir,
                f"evaluation_{timestamp}_on_{os.path.basename(self.encoder).split('.')[0]}.csv"
            )
            self.evaluate_processor.save_results(save_path)

            mlflow.log_param("evaluation_save_path", save_path)
            mlflow.log_param("encoder", self.encoder)
            mlflow.log_param("encoder_weights", self.encoder_weights)
            if self.save_dir:
                mlflow.log_param("save_predictions_dir", self.save_dir)
                

    # TODO: do this in Dataset: use crop and set image_size as a param
    @staticmethod
    def pad_images(images, target_height=544, target_width=704):
        """
        (Move to Dataset class and consider more flexible resizing options: crop, etc.)
        """
        import torch.nn.functional as F
        height, width = images.shape[-2], images.shape[-1]
        pad_height = target_height - height
        pad_width = target_width - width
        padding = (0, pad_width, 0, pad_height, 0, 0)  # (left, right, top, bottom)
        return F.pad(images, padding, mode='constant', value=0)
    
#########
# Main
# Log:
# 01: resnet34, imagenet
# 02: efficientnet-b0, imagenet: √
# 03: efficientnet-b1, imagenet: (inversed)
# 04: mobilenet_v2, imagenet
# 05: se_resnext50_32x4d, imagenet: Connection refused -- TODO: download manually
# 06: resnet50, imagenet
# 07: resnet50, ssl
# 08: resnet50, swsl
# 09: desenet121, imagenet: Connection refused -- TODO: download manually
# 10: efficientnet-b1, advprop
#  - switch channel order : first channel (R) as full-foreground
# 11: efficientnet-b0, advprop: √√
# 12: efficientnet-b2, advprop: ? (features definitely recognized but inversed - 0s in object and 1s in bkg)
# 13: efficientnet-b3, advprop: √√ (makes good visual sense! MeanObjectIoU is low but doesn't matter as metric is not sensitive at this low end)
# 14: efficientnet-b2, imagenet: √√ (visually the closest! (density and location in both channnel). Simply noisy!)
# 15: efficientnet-b3, imagenet

evaluator = Evaluator("efficientnet-b3")
evaluator.handle_device()
evaluator.evaluate()


[2025-03-12 17:46:51,033: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-03-12 17:46:51,036: INFO: common: json file loaded succesfully from: params.json]
[2025-03-12 17:46:51,173: INFO: dataset: Dataset: seg_2ch successfully processed. ]


Evaluation: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s]


#### ConvNeCt

In [None]:
import timm

model_list = [m for m in timm.list_models() if "convnext" in m]
model_list


['convnext_atto',
 'convnext_atto_ols',
 'convnext_atto_rms',
 'convnext_base',
 'convnext_femto',
 'convnext_femto_ols',
 'convnext_large',
 'convnext_large_mlp',
 'convnext_nano',
 'convnext_nano_ols',
 'convnext_pico',
 'convnext_pico_ols',
 'convnext_small',
 'convnext_tiny',
 'convnext_tiny_hnf',
 'convnext_xlarge',
 'convnext_xxlarge',
 'convnext_zepto_rms',
 'convnext_zepto_rms_ols',
 'convnextv2_atto',
 'convnextv2_base',
 'convnextv2_femto',
 'convnextv2_huge',
 'convnextv2_large',
 'convnextv2_nano',
 'convnextv2_pico',
 'convnextv2_small',
 'convnextv2_tiny',
 'test_convnext',
 'test_convnext2',
 'test_convnext3']

In [37]:
# Select model
encoder_name = "convnext_base"  # Change if needed

# Load model with feature extraction
model = timm.create_model(encoder_name, pretrained=True, features_only=True)

# Print available feature maps
print(model.feature_info)

[2025-03-10 18:28:07,236: INFO: _builder: Loading pretrained weights from Hugging Face hub (timm/convnext_base.fb_in22k_ft_in1k)]
[2025-03-10 18:28:07,873: INFO: _hub: [timm/convnext_base.fb_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.]
[2025-03-10 18:28:08,329: INFO: _builder: Missing keys (head.fc.weight, head.fc.bias) discovered while loading pretrained weights. This is expected if model is being adapted.]
<timm.models._features.FeatureInfo object at 0x336c10490>


In [33]:
import torch
import torch.nn as nn
import timm

class ConvNeXt_UNet(nn.Module):
    def __init__(self, encoder_name="convnext_base", num_classes=1, pretrained=True):
        super(ConvNeXt_UNet, self).__init__()

        # Load ConvNeXt as the encoder
        self.encoder = timm.create_model(encoder_name, pretrained=pretrained, features_only=True, out_indices=(0, 1, 2, 3, 4))
        encoder_channels = self.encoder.feature_info.channels()  # Get feature map sizes

        # Decoder
        self.decoder = nn.ModuleList([
            self.up_block(encoder_channels[4], encoder_channels[3]),
            self.up_block(encoder_channels[3], encoder_channels[2]),
            self.up_block(encoder_channels[2], encoder_channels[1]),
            self.up_block(encoder_channels[1], encoder_channels[0]),
            self.up_block(encoder_channels[0], num_classes, final_layer=True)
        ])

    def up_block(self, in_channels, out_channels, final_layer=False):
        """Creates an upsampling block with transposed convolution"""
        layers = [
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        ]
        if final_layer:
            layers.append(nn.Sigmoid())  # For binary segmentation; change for multi-class
        return nn.Sequential(*layers)

    def forward(self, x):
        # Encoder forward pass
        enc_features = self.encoder(x)  # List of feature maps at different levels

        # Decoder forward pass
        x = enc_features[-1]  # Start from deepest feature map
        for i, up in enumerate(self.decoder):
            x = up(x)
            if i < len(enc_features) - 1:
                x = torch.cat([x, enc_features[len(enc_features) - 2 - i]], dim=1)  # Skip connection

        return x

# Example Usage
if __name__ == "__main__":
    model = ConvNeXt_UNet(encoder_name="convnext_base", num_classes=1, pretrained=True)
    x = torch.randn(1, 3, 256, 256)  # Example input image
    y = model(x)
    print(y.shape)  # Should be [1, 1, 256, 256] (for binary segmentation)


[2025-03-10 17:12:30,253: INFO: _builder: Loading pretrained weights from Hugging Face hub (timm/convnext_base.fb_in22k_ft_in1k)]
[2025-03-10 17:17:41,717: INFO: _hub: [timm/convnext_base.fb_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.]
[2025-03-10 17:17:42,341: INFO: _builder: Missing keys (head.fc.weight, head.fc.bias) discovered while loading pretrained weights. This is expected if model is being adapted.]


IndexError: list index out of range