<a href="https://colab.research.google.com/github/Shaobin675/Path_in_ML_model_training/blob/main/Path_from_2D_image_to_3D_image_ML_model_training_101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
'''
augment.py
By putting them in one pipeline, allow the model to learn complex combinations.
In the real world, an X-ray might be both dark and noisy. If you only trained them separately,
the model might get confused when it sees both issues at once.
'''
class XRayAugmenter:
    def __init__(self, img_size=224):
        # We define a pipeline
        self.train_transform = A.Compose([
            A.Resize(img_size, img_size),
            # Realistic variations
            A.RandomBrightnessContrast(p=0.5),
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
            A.HorizontalFlip(p=0.5),
            # Standardize for the ML model
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])

    def apply(self, image_path):
        # Load using OpenCV (C++ backend)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return self.train_transform(image=image)['image']

import torch
import torch.nn as nn
import torchvision.models as models
#feature_extractor.py or model
class XRayEncoder(nn.Module):
    def __init__(self, model_name='resnet50'):
        super(XRayEncoder, self).__init__()
        # Load pre-trained backbone
        base_model = models.resnet50(weights='IMAGENET1K_V1')

        # Remove the classification head (last layer)
        # ResNet50's last layer is named 'fc'
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])

        # Freeze parameters (Optional: either save compute or not)
        for param in self.backbone.parameters():
            param.requires_grad = False

    def forward(self, x):
        # x is the augmented image tensor from our pipeline
        embedding = self.backbone(x)
        # Flatten from (1, 2048, 1, 1) to (1, 2048)
        return torch.flatten(embedding, 1)
import os
import cv2
from torch.utils.data import Dataset, DataLoader
#data_loader.py
class XRayDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load one image at a time
        img_path = self.image_paths[idx]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            # Applying your specific augmentation class logic
            image = self.transform(image=image)['image']

        return image

# Usage for large-scale:
# loader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=8)

import torch
from torch.utils.data import DataLoader
import glob

def run_production_inference():
    # A. Setup Device (The "Engine")
    # This detects NVIDIA GPU (cuda), Apple Silicon (mps), or CPU
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    print(f"Running on: {device}")

    # B. Initialize System Components
    augmenter = XRayAugmenter(img_size=224)
    model = XRayEncoder().to(device) # Move entire model to GPU
    model.eval()

    # C. Data Loading
    image_list = glob.glob("path/to/xrays/*.jpg") # Thousands of images
    dataset = XRayDataset(image_list, transform=augmenter.train_transform)

    loader = DataLoader(
        dataset,
        batch_size=64,       # Process 64 images at once
        num_workers=4,        # 4 CPU processes fetching images (Beats the GIL)
        pin_memory=True       # Faster transfer from RAM to GPU
    )

    # D. Inference Loop
    all_embeddings = []

    with torch.no_grad(): # Disable gradient math to save memory
        for batch in loader:
            # Move data batch to GPU
            batch = batch.to(device, non_blocking=True)

            # Forward pass on GPU
            embeddings = model(batch)

            # Move back to CPU for storage/analysis
            all_embeddings.append(embeddings.cpu())

    # Stack all batches into one big matrix
    final_database = torch.cat(all_embeddings, dim=0)
    print(f"âœ… Generated {final_database.shape[0]} embeddings.")

if __name__ == "__main__":
    run_production_inference()