<a href="https://colab.research.google.com/github/MalakAhmed2003/Cellula/blob/main/Cellula_task_4_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!ls "/content/drive/MyDrive/satalite data/data/images"
!ls "/content/drive/MyDrive/satalite data/data/labels"

In [None]:
import os
import numpy as np
from PIL import Image
import cv2
from google.colab import drive

# === Mount Google Drive ===
# Mountpoint should be a local directory path, not a URL
drive.mount('/content/drive')

# === Set Paths ===
# Update paths to point to the unzipped data directory
image_dir = "/content/drive/MyDrive/satalite data/data/images"
mask_dir  = "/content/drive/MyDrive/satalite data/data/labels"

In [None]:
import os
import numpy as np
import tifffile
import cv2
from PIL import Image

def load_tif_images_and_png_masks(image_dir, mask_dir, img_size=(128, 128), max_channels=12):
    images = []
    masks = []

    # Extract filenames without extensions
    image_files = {os.path.splitext(f)[0]: f for f in os.listdir(image_dir) if f.endswith(".tif")}
    mask_files  = {os.path.splitext(f)[0]: f for f in os.listdir(mask_dir) if f.endswith(".png")}

    # Use only matching base names
    common_keys = sorted(set(image_files.keys()) & set(mask_files.keys()))
    print(f"✅ Found {len(common_keys)} matched image–mask pairs.")

    for key in common_keys:
        try:
            img_path = os.path.join(image_dir, image_files[key])
            mask_path = os.path.join(mask_dir, mask_files[key])

            # === Load .tif Image ===
            img_array = tifffile.imread(img_path)

            if img_array.ndim == 2:
                img_array = np.expand_dims(img_array, axis=-1)
            elif img_array.shape[0] <= max_channels and img_array.shape[0] < img_array.shape[-1]:
                img_array = np.transpose(img_array, (1, 2, 0))
            if img_array.shape[-1] > max_channels:
                img_array = img_array[..., :max_channels]

            img_array = cv2.resize(img_array, img_size)
            img_array = img_array.astype(np.float32) / 255.0

            # === Load .png Mask ===
            mask = Image.open(mask_path).convert("L")  # convert to grayscale
            mask = mask.resize(img_size, Image.NEAREST)
            mask_array = np.array(mask)
            mask_array = (mask_array > 0).astype(np.uint8)  # binary mask

            images.append(img_array)
            masks.append(mask_array)

        except Exception as e:
            print(f"❌ Skipping {key}: {e}")

    if not images:
        raise ValueError("❌ No matching image–mask pairs were loaded.")

    return np.stack(images), np.stack(masks)


In [None]:
image_dir = "/content/drive/MyDrive/satalite data/data/images"
mask_dir  = "/content/drive/MyDrive/satalite data/data/labels"
X_data, Y_data = load_tif_images_and_png_masks(image_dir, mask_dir)

# Optional: split into train/val/test
from sklearn.model_selection import train_test_split
X_train, X_temp, Y_train, Y_temp = train_test_split(X_data, Y_data, test_size=0.3, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

In [None]:
import matplotlib.pyplot as plt

def display_sample(X, Y, index=0):
    image = X[index]
    mask = Y[index]

    if image.shape[-1] > 3:
        image = image[..., :3]

    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Image")
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap="gray")
    plt.title("Mask")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

display_sample(X_data, Y_data, index=0)


In [None]:
from PIL import Image

output_dir = "converted_png"
os.makedirs(output_dir, exist_ok=True)

for path in image_dir:
    try:
        img = tiff.imread(path)
        img = (img / img.max() * 255).astype(np.uint8)
        out_path = os.path.join(output_dir, os.path.basename(path).replace(".tif", ".png"))
        Image.fromarray(img).save(out_path)
    except Exception as e:
        print(f"Could not convert {path}: {e}")


In [None]:
import tifffile as tiff
import numpy as np
import matplotlib.pyplot as plt

path =  "/content/drive/MyDrive/satalite data/data/images"

try:
    img = tiff.imread(path)
    print(f"✅ Loaded: shape={img.shape}, dtype={img.dtype}")

    # Normalize for viewing
    img = img.astype(np.float32)
    img -= img.min()
    img /= img.max() if img.max() > 0 else 1

    plt.imshow(img if img.ndim == 2 else img[..., 0], cmap='gray')
    plt.title("Preview of 58.tif")
    plt.axis('off')
    plt.show()

except Exception as e:
    print(f"❌ Failed to load: {e}")


In [None]:
import tifffile as tiff
import numpy as np
from tqdm import tqdm
from PIL import Image

def load_and_normalize_tif(path):
    img = tiff.imread(path).astype(np.float32)

    # Normalize to [0, 1]
    img -= img.min()
    if img.max() > 0:
        img /= img.max()

    # Ensure 3D shape: (H, W, C)
    if img.ndim == 2:
        img = np.expand_dims(img, axis=-1)
    return img

def load_and_prepare_dataset_tiff(image_paths, label_paths):
    X = []
    Y = []

    for img_path, lbl_path in tqdm(zip(image_paths, label_paths), total=len(image_paths)):
        try:
            img = load_and_normalize_tif(img_path)
            lbl = np.array(Image.open(lbl_path)).astype(np.int64)

            X.append(img)
            Y.append(lbl)
        except Exception as e:
            print(f"⚠️ Skipping {img_path}: {e}")

    return np.array(X), np.array(Y)


In [None]:
import matplotlib.pyplot as plt

def show_batch(images, masks, num=4):
    plt.figure(figsize=(12, 6))

    for i in range(num):
        # Image
        plt.subplot(2, num, i + 1)
        img = images[i]

        if img.shape[-1] == 1:
            plt.imshow(img[:, :, 0], cmap='gray')
        elif img.shape[-1] >= 3:
            plt.imshow(img[:, :, :3])  # show first 3 bands
        else:
            raise ValueError(f"Image has unsupported shape: {img.shape}")

        plt.title(f"Image {i}")
        plt.axis("off")

        # Mask
        plt.subplot(2, num, i + 1 + num)
        plt.imshow(masks[i], cmap='gray')
        plt.title(f"Mask {i}")
        plt.axis("off")

    plt.tight_layout()
    plt.show()

# Show first 4
show_batch(X_train, Y_train, num=4)


In [None]:
!pip install segmentation-models-pytorch efficientnet-pytorch

In [None]:
# 📦 Install if needed
!pip install segmentation_models_pytorch --quiet

# 🧠 Load Pretrained U-Net
import segmentation_models_pytorch as smp
import torch

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 🔧 Model configuration
model = smp.Unet(
    encoder_name="resnet34",        # Encoder backbone
    encoder_weights="imagenet",     # Use ImageNet pretrained weights
    in_channels=12,                  # RGB input
    classes=1,                      # Binary mask output (1 channel)
    activation=None                 # We'll apply sigmoid manually during inference
)

# Move model to GPU or CPU
model = model.to(device)

print("✅ Pretrained U-Net model loaded.")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
from torchvision.models import resnet50
import numpy as np
from sklearn.metrics import classification_report

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Helper: Create dataloader from NumPy arrays ===
def create_dataloader(X, Y, batch_size=8, shuffle=False):
    X_tensor = torch.tensor(X, dtype=torch.float32).permute(0, 3, 1, 2)  # NHWC → NCHW
    Y_tensor = torch.tensor(Y, dtype=torch.long)  # Masks as integer labels
    dataset = TensorDataset(X_tensor, Y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

# === Dataloaders ===
train_loader = create_dataloader(X_train, Y_train, batch_size=8, shuffle=True)
val_loader   = create_dataloader(X_val, Y_val, batch_size=8, shuffle=False)
test_loader  = create_dataloader(X_test, Y_test, batch_size=8, shuffle=False)

# === Load Pretrained DeepLabV3 ===
weights = DeepLabV3_ResNet50_Weights.DEFAULT
model = deeplabv3_resnet50(weights=weights)

# === Modify first conv layer to accept 12-channel input ===
# Old: Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
old_conv = model.backbone.conv1
new_conv = nn.Conv2d(12, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Copy weights from original 3 channels, others = mean
with torch.no_grad():
    new_conv.weight[:, :3] = old_conv.weight
    if new_conv.weight.shape[1] > 3:
        mean_weights = old_conv.weight.mean(dim=1, keepdim=True)
        new_conv.weight[:, 3:] = mean_weights.repeat(1, 9, 1, 1)  # Fill remaining channels

model.backbone.conv1 = new_conv

# === Modify classifier for 2-class output ===
model.classifier[4] = nn.Conv2d(256, 2, kernel_size=1)
model = model.to(device)

# === Loss & Optimizer ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# === Training Loop ===
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)['out']  # [B, C, H, W]
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {running_loss / len(train_loader):.4f}")

    # === Validation ===
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, masks in val_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)['out']
            loss = criterion(outputs, masks)
            val_loss += loss.item()

    print(f"→ Validation Loss: {val_loss / len(val_loader):.4f}")

# === Testing ===
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, masks in test_loader:
        images = images.to(device)
        outputs = model(images)['out']
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.append(preds)
        all_labels.append(masks.numpy())

# Flatten predictions and ground truth
y_true = np.concatenate([y.flatten() for y in all_labels])
y_pred = np.concatenate([y.flatten() for y in all_preds])

# === Classification Report ===
print("\n✅ Water Class Metrics:")
print(classification_report(y_true, y_pred, target_names=["Non-Water", "Water"], zero_division=0))


In [None]:
!pip install nbstripout
# Running nbstripout on the current notebook from within the notebook might not work as intended.
# If your goal is to clear outputs, use the Colab interface (Edit -> Clear all outputs).
!nbstripout Cellula_task_4.ipynb

In [None]:
!ls Cellula_task_4.ipynb

In [None]:
import nbformat

notebook_path = "/content/drive/MyDrive/Colab Notebooks/Cellula_task_4.ipynb"
output_path = notebook_path.replace(".ipynb", "_clean.ipynb")

with open(notebook_path) as f:
    nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)

# Remove problematic metadata
if 'widgets' in nb['metadata']:
    del nb['metadata']['widgets']

# Save clean notebook
with open(output_path, 'w') as f:
    nbformat.write(nb, f)

print(f"✅ Cleaned notebook saved to: {output_path}")
