# **init**

In [None]:
# --- Installs ---
import os
os.environ["SM_FRAMEWORK"] = "tf.keras"

# Uninstall the current segmentation-models
!pip uninstall -y segmentation-models
# Install a specific, potentially more compatible version
!pip install segmentation-models==1.0.1

import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm


# TensorFlow / Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks

# Notebook execution
import nbformat
from IPython.core.interactiveshell import InteractiveShell
from sklearn.metrics import confusion_matrix, classification_report

# --- Mount Google Drive ---
from tqdm.notebook import tqdm
import os
from google.colab import drive

if not os.path.ismount("/content/drive"):
    drive.mount("/content/drive")
else:
    print("✅ Drive already mounted.")



# --- Extract Dataset from Drive ---
import zipfile
zip_path = "/content/drive/MyDrive/Aerial Segmentation Machine Learning/chipped_data.zip"
extract_to = "/content/chipped_data"

if not os.path.exists(extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        file_list = zip_ref.namelist()
        print(f"📦 Extracting {len(file_list)} files...")
        for file in tqdm(file_list, desc="🔓 Unzipping"):
            zip_ref.extract(file, path=extract_to)
    print("✅ Dataset unzipped.")
else:
    print("✅ Dataset already extracted.")

'''
zip_path = "/content/drive/MyDrive/Aerial Segmentation Machine Learning/dataset-medium.zip"
extract_to = "/content/dataset-medium"

if not os.path.exists(extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        file_list = zip_ref.namelist()
        print(f"📦 Extracting {len(file_list)} files...")
        for file in tqdm(file_list, desc="🔓 Unzipping"):
            zip_ref.extract(file, path=extract_to)
    print("✅ Dataset unzipped.")
else:
    print("✅ Dataset already extracted.")
'''



# --- Directories ---
base_dir = "/content/chipped_data/chipped_data"

train_image_dir = os.path.join(base_dir, "train/images")
train_elev_dir  = os.path.join(base_dir, "train/elevations")
train_label_dir = os.path.join(base_dir, "train/labels")

out_dir = "/content/figs"

if not os.path.exists(out_dir):
    os.makedirs(out_dir)
    print(f"📂 Created directory: {out_dir}")
else:
    print(f"✅ Directory already exists: {out_dir}")

checkpoints = "/content/checkpoints"
if not os.path.exists(checkpoints):
    os.makedirs(checkpoints)
    print(f"📂 Created directory: {checkpoints}")
else:
    print(f"✅ Directory already exists: {checkpoints}")


In [None]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

# Install build tools and dependencies
!apt-get install -y build-essential python3-dev
!pip install cython
!pip install git+https://github.com/lucasb-eyer/pydensecrf.git
import pydensecrf.densecrf as dcrf
from pydensecrf.utils import unary_from_softmax, create_pairwise_bilateral
print("✅ CRF imports working!")


# --- GitHub Access (for a public repository) ---
import os

# GitHub repo details
repo_owner = "AronBakes"
repo_name = "Semantic-Segmentation-of-Aerial-Imagery"
branch = "master"

# Construct the standard public URL without a token
repo_url = f"https://github.com/{repo_owner}/{repo_name}.git"
repo_dir = f"/content/{repo_name}"

# The rest of your git clone logic remains exactly the same
if not os.path.exists(repo_dir):
    print("📥 Cloning public repo...")
    !git clone -b {branch} {repo_url} {repo_dir}
else:
    print("🔄 Pulling latest from GitHub...")
    %cd {repo_dir}
    !git pull origin {branch}

# Change directory into the repo
%cd {repo_dir}



# Dynamically load team notebooks
import sys
import nbformat
from IPython.core.interactiveshell import InteractiveShell

# notebook_dir = '/content/drive/MyDrive/Colab Notebooks'
notebook_dir = '/content/Semantic-Segmentation-of-Aerial-Imagery'
notebooks_to_import = [
    "util.ipynb",
    "segformer.ipynb",
    "models.ipynb",
    "callbacks.ipynb",
    "distribute.ipynb",
    "data.ipynb",
    "scoring.ipynb",
    "training.ipynb",

    "models_gen.ipynb", "data_gen.ipynb", "train_generator.ipynb"
]

def run_notebook_cells(path):
    with open(path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)
        shell = InteractiveShell.instance()
        for cell in nb.cells:
            if cell.cell_type == 'code':
                shell.run_cell(cell.source)


# Load and run all notebooks (defines functions in global scope)
for nb_file in notebooks_to_import:
    nb_path = os.path.join(notebook_dir, nb_file)
    print(f"📥 Importing {nb_file}")
    run_notebook_cells(nb_path)



# **Segmentation Model Training**

In [None]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')
os.environ["SM_FRAMEWORK"] = "tf.keras"


# --- Training ---
train_unet(
    base_dir=base_dir, out_dir=out_dir,
    input_type="rgb",
    model_type="enhanced_unet",
    batch_size=32,
    epochs=150,
    train_time=240,
    tile_size=512,
    verbose=1,
    yummy=False,
)

# **Synthetic Data Generation**

In [None]:
import tensorflow as tf
import os
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt

# --- Configuration ---

# The path to your saved generator model.
# Use the latest/best one from your training run.
# From your screenshot, 'gen_final_20250625-085333_epoch80.keras' is a good choice.
GENERATOR_MODEL_PATH = '/content/drive/MyDrive/Aerial Segmentation Machine Learning/data_gen/checkpoints/gen_final_20250625-085333_epoch80.keras'

# The path to your real chipped data, where the label maps are.
CHIPPED_DATA_DIR = '/content/chipped_data/chipped_data'

# The NEW directory on your Drive where the final synthetic dataset will be saved.
SYNTHETIC_DATASET_DIR = '/content/drive/MyDrive/Aerial Segmentation Machine Learning/synthetic_dataset_v1'

# --- Setup Directories ---
# Create the main output folder
os.makedirs(SYNTHETIC_DATASET_DIR, exist_ok=True)
# Create the subdirectories for images and labels
SYNTHETIC_IMAGES_DIR = os.path.join(SYNTHETIC_DATASET_DIR, 'train', 'images')
SYNTHETIC_LABELS_DIR = os.path.join(SYNTHETIC_DATASET_DIR, 'train', 'labels')
os.makedirs(SYNTHETIC_IMAGES_DIR, exist_ok=True)
os.makedirs(SYNTHETIC_LABELS_DIR, exist_ok=True)


# --- Main Generation Logic ---

print("--- Starting Synthetic Data Generation ---")

# 1. Load the trained generator model
print(f"Loading generator model from: {GENERATOR_MODEL_PATH}")
generator = tf.keras.models.load_model(GENERATOR_MODEL_PATH)
print("✅ Generator loaded successfully.")

# 2. Find all the real label maps to use as blueprints
real_label_dir = os.path.join(CHIPPED_DATA_DIR, 'train', 'labels')
label_paths = sorted(glob.glob(os.path.join(real_label_dir, '*.png')))
print(f"Found {len(label_paths)} label maps to use as blueprints.")

# 3. Loop, Generate, and Save
for label_path in tqdm(label_paths, desc="Generating synthetic pairs"):
    try:
        # Load the original label map
        label_img_raw = tf.io.read_file(label_path)
        label_img = tf.image.decode_png(label_img_raw, channels=3)

        # Preprocess it for the GAN (normalize to [-1, 1])
        label_tensor = tf.cast(label_img, tf.float32)
        label_tensor = (label_tensor / 127.5) - 1

        # The generator expects a batch, so add a batch dimension
        input_tensor = tf.expand_dims(label_tensor, 0)

        # Generate the synthetic RGB image
        generated_image_tensor = generator(input_tensor, training=False)[0] # Get the first image from the batch

        # Denormalize the output from [-1, 1] to [0, 255] for saving
        generated_image_np = (generated_image_tensor * 0.5 + 0.5).numpy() * 255
        generated_image_np = generated_image_np.astype('uint8')

        # --- Save the new pair ---
        base_filename = os.path.basename(label_path)

        # Define the output paths
        new_label_path = os.path.join(SYNTHETIC_LABELS_DIR, base_filename)
        # Create the corresponding image filename
        new_image_filename = base_filename.replace('-label.png', '-ortho.png')
        new_image_path = os.path.join(SYNTHETIC_IMAGES_DIR, new_image_filename)

        # Save the original label map (the blueprint)
        # We need to convert the tensor back to an image file format
        tf.keras.utils.save_img(new_label_path, tf.cast(label_img, tf.uint8).numpy())

        # Save the newly generated synthetic RGB image
        # Convert from RGB (for tensorflow) to BGR (for OpenCV/cv2)
        generated_image_bgr = cv2.cvtColor(generated_image_np, cv2.COLOR_RGB2BGR)
        cv2.imwrite(new_image_path, generated_image_bgr)

    except Exception as e:
        print(f"Could not process {label_path}. Error: {e}")

print("\n--- Synthetic Data Generation Complete! ---")
print(f"✅ New dataset saved in: {SYNTHETIC_DATASET_DIR}")

In [None]:
import sys
import os
if os.getcwd() not in sys.path:
     sys.path.insert(0, os.getcwd())

# params
CHIPPED_DATA_DIR = '/content/chipped_data/chipped_data'

# Prepare the dataset by calling the function from data_gan.py
print("Preparing the dataset for the GAN...")
train_dataset = get_gan_dataset(CHIPPED_DATA_DIR, augment=True, shuffle=True)
print("Dataset ready.")

# Call the main training function from train_generator.py
print(f"Starting GAN training for {EPOCHS} epochs...")
train(train_dataset, EPOCHS)
print("--- Training Finished ---")

# **Other**

## **Dataset Distribution**

In [None]:
df_full = csv_to_full_df()
plot_class_distribution_from_df(df_full, "Class Distribution")