In [1]:
!pip install gdown



In [2]:
# 确定下载链接和文件名
DIRECT_LINK = "https://prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com/0d67e451-6388-462f-bb51-36d506b4865c"
OUTPUT_FILENAME = "sen2_lulc_data.zip" # 假设是 zip 格式

print(f"开始使用 wget 下载数据集 ({OUTPUT_FILENAME}) 到 /content/ ...")
# 使用 wget 下载文件，-O 指定输出文件名
!wget -O {OUTPUT_FILENAME} {DIRECT_LINK}

print("--- 下载完成 ---")

开始使用 wget 下载数据集 (sen2_lulc_data.zip) 到 /content/ ...
--2025-11-27 10:49:12--  https://prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com/0d67e451-6388-462f-bb51-36d506b4865c
Resolving prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com (prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com)... 52.92.19.146, 3.5.67.254, 52.92.32.178, ...
Connecting to prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com (prod-dcd-datasets-public-files-eu-west-1.s3.eu-west-1.amazonaws.com)|52.92.19.146|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2445753004 (2.3G) [application/zip]
Saving to: ‘sen2_lulc_data.zip’


2025-11-27 10:50:46 (25.1 MB/s) - ‘sen2_lulc_data.zip’ saved [2445753004/2445753004]

--- 下载完成 ---


In [3]:
OUTPUT_FILENAME = "sen2_lulc_data.zip"
UNZIPPED_FOLDER = "/content/sen2_lulc_raw/"

print(f"开始解压 {OUTPUT_FILENAME} 到 {UNZIPPED_FOLDER} ...")
# 创建目标文件夹并解压
!mkdir -p {UNZIPPED_FOLDER}
!unzip {OUTPUT_FILENAME} -d {UNZIPPED_FOLDER}

print("--- 解压完成 ---")
# 打印解压后文件夹中的前几个文件（可选，用于验证）
# !ls {UNZIPPED_FOLDER} | head -n 5

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/11303.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/113060.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/113219.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/113317.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/113344.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/114018.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/114097.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/114196.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/1143.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/114649.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/114777.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/115130.tif  
  inflating: /content/sen2_lulc_raw/SEN-2 LULC/val_masks/val/115462.tif  


In [11]:
# --- 0. 挂载 Google Drive 并安装依赖 ---
from google.colab import drive
import os

# 挂载您的 Google Drive
print("请在弹出的窗口中授权 Google Drive 挂载...")
drive.mount('/content/drive')
# 确保目标保存路径存在
WEIGHTS_SAVE_DIR = '/content/drive/MyDrive/LULC_Pretrain_Weights'
if not os.path.exists(WEIGHTS_SAVE_DIR):
    os.makedirs(WEIGHTS_SAVE_DIR)
print(f"权重将保存到 Drive 路径: {WEIGHTS_SAVE_DIR}")

# 必须安装的库
!pip install rasterio opencv-python

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import glob
import rasterio
import cv2

# 启用 AUTOTUNE
AUTOTUNE = tf.data.AUTOTUNE

# --- 1. 全局配置与参数 (关键修改在这里) ---
IMG_HEIGHT = 64
IMG_WIDTH = 64
NUM_BANDS = 3
NUM_CLASSES = 7
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, NUM_BANDS)

GLOBAL_MIN = 0.0
GLOBAL_MAX = 255.0

# 🚨 关键修改：减少到 8 个 Epoch
EPOCHS = 8
BATCH_SIZE = 16
LEARNING_RATE = 1e-4

# 权重保存到 Drive
MODEL_WEIGHTS_PATH = os.path.join(WEIGHTS_SAVE_DIR, 'best_lulc_pretrained_weights.h5')

# --- 数据集目录配置 (路径保持不变) ---
BASE_RAW_DIR = "/content/sen2_lulc_raw/SEN-2 LULC/"
TRAIN_IMG_DIR = os.path.join(BASE_RAW_DIR, 'train_images', 'train')
TRAIN_MASK_DIR = os.path.join(BASE_RAW_DIR, 'train_masks', 'train')
VAL_IMG_DIR = os.path.join(BASE_RAW_DIR, 'val_images', 'val')
VAL_MASK_DIR = os.path.join(BASE_RAW_DIR, 'val_masks', 'val')


# --- 2. 辅助函数 (保持不变) ---

def global_normalize(img: np.ndarray, global_min: float, global_max: float) -> np.ndarray:
    if global_max == global_min:
        return img.astype(np.float32)
    img_norm = (img - global_min) / (global_max - global_min)
    return np.clip(img_norm, 0.0, 1.0).astype(np.float32)

def create_lulc_paths(img_dir, mask_dir):
    img_files = glob.glob(os.path.join(img_dir, '*.png'))
    patch_paths = []
    for img_path in img_files:
        file_id = os.path.basename(img_path).split('.')[0]
        mask_path = os.path.join(mask_dir, f"{file_id}.tif")
        if os.path.exists(mask_path):
            patch_paths.append((img_path, mask_path))
    return patch_paths


# --- 3. 模型组件 (U-Net) (已修正) ---

def conv_block(input_tensor, num_filters, kernel_size=(3, 3)):
    x = layers.Conv2D(num_filters, kernel_size, activation='relu', kernel_initializer='he_normal', padding='same')(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(num_filters, kernel_size, activation='relu', kernel_initializer='he_normal', padding='same')(x)
    x = layers.BatchNormalization()(x)
    return x

def encoder_block(input_tensor, num_filters):
    x = conv_block(input_tensor, num_filters)
    p = layers.MaxPooling2D((2, 2))(x)
    return x, p

def decoder_block(input_tensor, skip_tensor, num_filters):
    x = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
    x = layers.concatenate([x, skip_tensor])
    x = conv_block(x, num_filters)
    return x

def build_lulc_unet(input_shape, num_classes):
    inputs = layers.Input(input_shape)

    c1, p1 = encoder_block(inputs, 32)
    c2, p2 = encoder_block(p1, 64)
    c3, p3 = encoder_block(p2, 128)
    c4, p4 = encoder_block(p3, 256)

    b = conv_block(p4, 512)

    u4 = decoder_block(b, c4, 256)
    u3 = decoder_block(u4, c3, 128)
    u2 = decoder_block(u3, c2, 64)
    u1 = decoder_block(u2, c1, 32) # 修正：输入是 u2

    outputs = layers.Conv2D(num_classes, (1, 1), activation='softmax')(u1)

    model = Model(inputs=[inputs], outputs=outputs)
    return model


# --- 4. tf.data 管道 (保持不变) ---

@tf.function(input_signature=[
    tf.TensorSpec(None, tf.string),
    tf.TensorSpec(None, tf.string)
])
def load_patch_data_wrapper(img_fp_tensor, mask_fp_tensor):
    def load_numpy_arrays(img_fp, mask_fp):
        img_fp_str = img_fp.numpy().decode('utf-8')
        mask_fp_str = mask_fp.numpy().decode('utf-8')

        img = cv2.imread(img_fp_str, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        with rasterio.open(mask_fp_str) as src:
            label_int = src.read(1)

        X = global_normalize(img.astype(np.float32), GLOBAL_MIN, GLOBAL_MAX)
        label_clipped = np.clip(label_int, 0, NUM_CLASSES - 1).astype(np.int32)
        Y = tf.keras.utils.to_categorical(label_clipped, num_classes=NUM_CLASSES)

        return X.astype(np.float32), Y.astype(np.float32)

    return tf.py_function(
        load_numpy_arrays,
        [img_fp_tensor, mask_fp_tensor],
        [tf.float32, tf.float32]
    )

def create_dataset(patch_files, batch_size, is_training):
    img_paths = [p[0] for p in patch_files]
    mask_paths = [p[1] for p in patch_files]

    dataset = tf.data.Dataset.from_tensor_slices((img_paths, mask_paths))

    if is_training:
        dataset = dataset.shuffle(buffer_size=min(len(patch_files), 10000))

    dataset = dataset.map(
        load_patch_data_wrapper,
        num_parallel_calls=AUTOTUNE
    )

    dataset = dataset.map(lambda x, y: (
        tf.ensure_shape(x, INPUT_SHAPE),
        tf.ensure_shape(y, (IMG_HEIGHT, IMG_WIDTH, NUM_CLASSES))
    ), num_parallel_calls=AUTOTUNE)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)

    return dataset


# --- 5. 主训练函数 ---

def run_lulc_pretraining():
    print("--- 阶段 I: 数据加载器准备 (tf.data Pipeline) ---")

    train_patch_files = create_lulc_paths(TRAIN_IMG_DIR, TRAIN_MASK_DIR)
    val_patch_files = create_lulc_paths(VAL_IMG_DIR, VAL_MASK_DIR)

    if not train_patch_files or not val_patch_files:
        print("❌ 错误：无法找到足够的训练或验证切片。")
        return

    train_dataset = create_dataset(train_patch_files, BATCH_SIZE, is_training=True)
    val_dataset = create_dataset(val_patch_files, BATCH_SIZE, is_training=False)

    print(f"训练样本总数: {len(train_patch_files)}, 验证样本总数: {len(val_patch_files)}")

    print("\n--- 阶段 II: 模型构建与编译 ---")

    model = build_lulc_unet(INPUT_SHAPE, NUM_CLASSES)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes=NUM_CLASSES)]
    )

    model_checkpoint_callback = ModelCheckpoint(
        filepath=MODEL_WEIGHTS_PATH,
        monitor='val_mean_io_u',
        mode='max',
        save_best_only=True,
        save_freq='epoch',
        verbose=1
    )
    # 8个Epoch太少，不再设置 Early Stopping

    print(f"模型权重将保存到: {MODEL_WEIGHTS_PATH}")

    print(f"\n--- 阶段 III: 开始 LULC 预训练 (目标: {EPOCHS} 个 Epoch, 使用 GPU 加速抢跑) ---")

    # 检查是否有之前保存的权重，如果有，从那里继续
    if os.path.exists(MODEL_WEIGHTS_PATH):
        model.load_weights(MODEL_WEIGHTS_PATH)
        print(f"✅ 从 Drive 加载了上次的最佳权重，将从这里继续训练。")

    model.fit(
        train_dataset,
        epochs=EPOCHS,
        validation_data=val_dataset,
        callbacks=[model_checkpoint_callback],
        verbose=1
    )

    print("\n--- LULC 预训练完成 ---")

    model.load_weights(MODEL_WEIGHTS_PATH)
    print(f"最佳模型权重已从 Drive 加载: {MODEL_WEIGHTS_PATH}")

    return model

# --- 6. 启动执行 ---
if __name__ == '__main__':
    model = run_lulc_pretraining()

请在弹出的窗口中授权 Google Drive 挂载...
Mounted at /content/drive
权重将保存到 Drive 路径: /content/drive/MyDrive/LULC_Pretrain_Weights
--- 阶段 I: 数据加载器准备 (tf.data Pipeline) ---
训练样本总数: 149600, 验证样本总数: 32079

--- 阶段 II: 模型构建与编译 ---
模型权重将保存到: /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5

--- 阶段 III: 开始 LULC 预训练 (目标: 8 个 Epoch, 使用 GPU 加速抢跑) ---
Epoch 1/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.6346 - loss: 1.0673 - mean_io_u: 0.4286
Epoch 1: val_mean_io_u improved from -inf to 0.42857, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1280s[0m 134ms/step - accuracy: 0.6346 - loss: 1.0673 - mean_io_u: 0.4286 - val_accuracy: 0.7331 - val_loss: 0.6420 - val_mean_io_u: 0.4286
Epoch 2/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - accuracy: 0.7296 - loss: 0.6442 - mean_io_u: 0.4286
Epoch 2: val_mean_io_u did not improve from 0.42857
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1200s[0m 128ms/step - accuracy: 0.7296 - loss: 0.6442 - mean_io_u: 0.4286 - val_accuracy: 0.7704 - val_loss: 0.5460 - val_mean_io_u: 0.4286
Epoch 3/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.7611 - loss: 0.5676 - mean_io_u: 0.4286
Epoch 3: val_mean_io_u improved from 0.42857 to 0.42857, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1183s[0m 127ms/step - accuracy: 0.7611 - loss: 0.5676 - mean_io_u: 0.4286 - val_accuracy: 0.7923 - val_loss: 0.4915 - val_mean_io_u: 0.4286
Epoch 4/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.7804 - loss: 0.5221 - mean_io_u: 0.4289
Epoch 4: val_mean_io_u improved from 0.42857 to 0.42858, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1219s[0m 130ms/step - accuracy: 0.7804 - loss: 0.5221 - mean_io_u: 0.4289 - val_accuracy: 0.8031 - val_loss: 0.4713 - val_mean_io_u: 0.4286
Epoch 5/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step - accuracy: 0.7947 - loss: 0.4883 - mean_io_u: 0.4293
Epoch 5: val_mean_io_u improved from 0.42858 to 0.42867, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1243s[0m 133ms/step - accuracy: 0.7947 - loss: 0.4883 - mean_io_u: 0.4293 - val_accuracy: 0.8112 - val_loss: 0.4496 - val_mean_io_u: 0.4287
Epoch 6/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.8052 - loss: 0.4645 - mean_io_u: 0.4296
Epoch 6: val_mean_io_u did not improve from 0.42867
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1180s[0m 126ms/step - accuracy: 0.8052 - loss: 0.4645 - mean_io_u: 0.4296 - val_accuracy: 0.7984 - val_loss: 0.4814 - val_mean_io_u: 0.4286
Epoch 7/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.8149 - loss: 0.4418 - mean_io_u: 0.4300
Epoch 7: val_mean_io_u improved from 0.42867 to 0.42874, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1182s[0m 126ms/step - accuracy: 0.8150 - loss: 0.4418 - mean_io_u: 0.4300 - val_accuracy: 0.8210 - val_loss: 0.4285 - val_mean_io_u: 0.4287
Epoch 8/8
[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.8229 - loss: 0.4224 - mean_io_u: 0.4305
Epoch 8: val_mean_io_u improved from 0.42874 to 0.42882, saving model to /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5




[1m9350/9350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1173s[0m 125ms/step - accuracy: 0.8229 - loss: 0.4224 - mean_io_u: 0.4305 - val_accuracy: 0.8233 - val_loss: 0.4220 - val_mean_io_u: 0.4288

--- LULC 预训练完成 ---
最佳模型权重已从 Drive 加载: /content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5


In [12]:
# 1. 确保 gdown 已安装
!pip install gdown

# 2. 定义下载参数
FILE_ID = '1QlAdzrHpfBIOZ6SK78yHF2i1u6tikmBc'
OUTPUT_FILENAME = 'SECOND_dataset.zip'
DOWNLOAD_DIR = '/content/SECOND_data'

# 3. 创建目标文件夹
import os
if not os.path.exists(DOWNLOAD_DIR):
    os.makedirs(DOWNLOAD_DIR)

OUTPUT_PATH = os.path.join(DOWNLOAD_DIR, OUTPUT_FILENAME)

print(f"开始下载 SECOND 数据集到: {OUTPUT_PATH}")

# 4. 执行下载
!gdown --id $FILE_ID -O $OUTPUT_PATH

# 5. 解压文件 (假设它是一个 zip 文件)
print("\n开始解压文件...")
!unzip -q $OUTPUT_PATH -d $DOWNLOAD_DIR

print("\n🎉 SECOND 数据集下载和解压完成！")
print(f"数据现在位于 {DOWNLOAD_DIR} 目录下，请检查其内部结构。")

# 可选：检查解压后的文件列表
# print("目录内容预览:")
# !ls -F $DOWNLOAD_DIR

开始下载 SECOND 数据集到: /content/SECOND_data/SECOND_dataset.zip
Downloading...
From (original): https://drive.google.com/uc?id=1QlAdzrHpfBIOZ6SK78yHF2i1u6tikmBc
From (redirected): https://drive.google.com/uc?id=1QlAdzrHpfBIOZ6SK78yHF2i1u6tikmBc&confirm=t&uuid=bfc75e1f-f9e8-43b1-a02f-8493b628a36a
To: /content/SECOND_data/SECOND_dataset.zip
100% 2.41G/2.41G [00:23<00:00, 101MB/s]

开始解压文件...
[/content/SECOND_data/SECOND_dataset.zip]
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of /content/SECOND_data/SECOND_dataset.zip or
        /content/SECOND_data/SECOND_dataset.zip.zip, and cannot find /content/SECOND_data/SECOND_dataset.zip.ZIP, period.

🎉 SECOND 数据集下载和解压完成！
数据现在位于 /content/SECOND_data 目录下，请检查其内部结构。


In [15]:
# 安装 unrar 工具
print("安装 unrar 工具...")
!apt-get install unrar

安装 unrar 工具...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unrar is already the newest version (1:6.1.5-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [16]:
# 使用 unrar 工具解压文件
RAR_FILE = '/content/SECOND_data/SECOND_dataset.zip'
OUTPUT_DIR = '/content/SECOND_data/'

print(f"开始解压 RAR 文件: {RAR_FILE}...")
# -x: 提取文件，-o+: 覆盖现有文件（如果存在），-d: 目标目录
!unrar x $RAR_FILE $OUTPUT_DIR

print("\n🎉 SECOND 数据集已成功解压！")
print("现在数据已准备好用于变化检测模型的微调。")

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Extracting  /content/SECOND_data/im1/02100.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02101.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02102.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02104.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02105.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02106.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02108.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02117.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02118.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02119.png                            17%  OK 
Extracting  /content/SECOND_data/im1/02

In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import ModelCheckpoint
import glob
import os
import cv2
import sys

# 启用 AUTOTUNE
AUTOTUNE = tf.data.AUTOTUNE

# --- 0. 挂载 Google Drive 和路径配置 ---
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✅ Google Drive 挂载成功。")
except ImportError:
    print("❌ 无法在当前环境中挂载 Google Drive。请确保在 Colab 中运行。")

# LULC 预训练的权重路径 (请检查您的 Drive 路径)
LULC_WEIGHTS_PATH = '/content/drive/MyDrive/LULC_Pretrain_Weights/best_lulc_pretrained_weights.h5'
# SECOND 数据集的根目录 (由前面的解压步骤创建)
SECOND_DATA_ROOT = '/content/SECOND_data/'

# --- 1. 全局配置与参数 ---
IMG_HEIGHT = 512
IMG_WIDTH = 512
NUM_BANDS = 3
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, NUM_BANDS)

# 最终变化检测的输出类别数： 4 类
NUM_CHANGE_CLASSES = 4

# 🚨 Epochs 安全策略：10 个 Epoch
EPOCHS = 25
BATCH_SIZE = 4
LEARNING_RATE = 1e-5

# T1 和 T2 图像/标签路径
IM1_DIR = os.path.join(SECOND_DATA_ROOT, 'im1')
IM2_DIR = os.path.join(SECOND_DATA_ROOT, 'im2')
LABEL1_DIR = os.path.join(SECOND_DATA_ROOT, 'label1')
LABEL2_DIR = os.path.join(SECOND_DATA_ROOT, 'label2')

CD_WEIGHTS_PATH = os.path.join(os.path.dirname(LULC_WEIGHTS_PATH), 'best_cd_finetune_weights.h5')


# --- 2. U-Net 辅助函数 (保持不变) ---

def conv_block(input_tensor, num_filters, kernel_size=(3, 3), name_suffix=''):
    x = layers.Conv2D(num_filters, kernel_size, activation='relu', kernel_initializer='he_normal', padding='same', name=f'conv_{num_filters}_a{name_suffix}')(input_tensor)
    x = layers.BatchNormalization(name=f'bn_{num_filters}_a{name_suffix}')(x)
    x = layers.Conv2D(num_filters, kernel_size, activation='relu', kernel_initializer='he_normal', padding='same', name=f'conv_{num_filters}_b{name_suffix}')(x)
    x = layers.BatchNormalization(name=f'bn_{num_filters}_b{name_suffix}')(x)
    return x

def encoder_block(input_tensor, num_filters, name_prefix):
    x = conv_block(input_tensor, num_filters, name_suffix=f'_{name_prefix}')
    p = layers.MaxPooling2D((2, 2), name=f'pool_{num_filters}_{name_prefix}')(x)
    return x, p

def decoder_block(input_tensor, skip_tensor, num_filters):
    x = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
    x = layers.concatenate([x, skip_tensor])
    x = conv_block(x, num_filters)
    return x

# --- 3. Pseudo-Siamese U-Net 模型构建 (已修复命名冲突) ---

def build_pseudo_siamese_unet(input_shape, num_change_classes):
    input_t1 = layers.Input(input_shape, name='input_t1')
    input_t2 = layers.Input(input_shape, name='input_t2')

    c1_t1, p1_t1 = encoder_block(input_t1, 32, 't1')
    c2_t1, p2_t1 = encoder_block(p1_t1, 64, 't1')
    c3_t1, p3_t1 = encoder_block(p2_t1, 128, 't1')
    c4_t1, p4_t1 = encoder_block(p3_t1, 256, 't1')

    c1_t2, p1_t2 = encoder_block(input_t2, 32, 't2')
    c2_t2, p2_t2 = encoder_block(p1_t2, 64, 't2')
    c3_t2, p3_t2 = encoder_block(p2_t2, 128, 't2')
    c4_t2, p4_t2 = encoder_block(p3_t2, 256, 't2')

    # 🚨 命名修正：确保瓶颈层名称唯一
    b_t1 = conv_block(p4_t1, 512, name_suffix='_bottleneck_t1')
    b_t2 = conv_block(p4_t2, 512, name_suffix='_bottleneck_t2')

    bottleneck_diff = layers.Subtract(name='bottleneck_diff')([b_t1, b_t2])

    diff_c4 = layers.Subtract(name='skip_diff_c4')([c4_t1, c4_t2])
    diff_c3 = layers.Subtract(name='skip_diff_c3')([c3_t1, c3_t2])
    diff_c2 = layers.Subtract(name='skip_diff_c2')([c2_t1, c2_t2])
    diff_c1 = layers.Subtract(name='skip_diff_c1')([c1_t1, c1_t2])

    u4 = decoder_block(bottleneck_diff, diff_c4, 256)
    u3 = decoder_block(u4, diff_c3, 128)
    u2 = decoder_block(u3, diff_c2, 64)
    u1 = decoder_block(u2, diff_c1, 32)

    outputs = layers.Conv2D(num_change_classes, (1, 1), activation='softmax', name='change_output')(u1)
    model = Model(inputs=[input_t1, input_t2], outputs=outputs, name='Pseudo_Siamese_CD')
    return model


# --- 4. 权重迁移 (保持不变) ---

def transfer_weights(cd_model, lulc_weights_path):
    if not os.path.exists(lulc_weights_path):
        print(f"❌ 警告：LULC 权重文件不存在于 {lulc_weights_path}")
        return cd_model

    try:
        temp_lulc_model = tf.keras.models.load_model(lulc_weights_path, compile=False)
    except Exception as e:
        print(f"❌ 错误：加载 LULC 权重失败：{e}")
        return cd_model

    print("✅ LULC 预训练权重已加载到临时模型。")

    for lulc_layer in temp_lulc_model.layers:
        weights = lulc_layer.get_weights()
        if not weights: continue

        if lulc_layer.name.startswith(('conv', 'bn', 'pool')) and 't1' not in lulc_layer.name:
            t1_layer_name = lulc_layer.name
            if 'pool' not in lulc_layer.name:
                t1_layer_name = lulc_layer.name.replace('_a', '_a_t1').replace('_b', '_b_t1')
            else:
                 t1_layer_name = lulc_layer.name + '_t1'

            try:
                cd_model.get_layer(t1_layer_name).set_weights(weights)
                cd_model.get_layer(t1_layer_name.replace('_t1', '_t2')).set_weights(weights)
            except (ValueError, KeyError):
                pass

    print("✅ 权重迁移完成：LULC 编码器权重已成功赋给 Pseudo-Siamese 模型的 T1/T2 编码器。")
    del temp_lulc_model
    return cd_model


# --- 5. 变化检测数据加载器 (核心逻辑：已修复 PNG 和路径解码) ---

def create_change_map_label(label1_path, label2_path):
    """
    读取 LULC 标签，计算变化，并将其映射到我们的 4 个目标类别。
    参数 label1_path, label2_path 必须是标准 Python 字符串。
    """
    WATER_CLASS_ID = 4
    VEG_CLASS_IDS = [3, 5]

    # 🚨 使用 cv2 读取 PNG 标签（灰度图）
    L1 = cv2.imread(label1_path, cv2.IMREAD_GRAYSCALE)
    L2 = cv2.imread(label2_path, cv2.IMREAD_GRAYSCALE)

    if L1 is None or L2 is None or L1.shape != L2.shape:
        # 抛出错误以中断 tf.py_function
        raise ValueError(f"LULC 标签加载失败或尺寸不匹配。路径: {label1_path}, {label2_path}")

    CM = np.full_like(L1, 3, dtype=np.int32)

    CM[L1 == L2] = 0
    flood_mask = (L2 == WATER_CLASS_ID) & (L1 != WATER_CLASS_ID)
    CM[flood_mask] = 1
    veg_mask = np.isin(L1, VEG_CLASS_IDS)
    not_veg_mask_l2 = ~np.isin(L2, VEG_CLASS_IDS)
    destruct_mask = veg_mask & not_veg_mask_l2 & (L1 != L2)
    CM[destruct_mask] = 2

    return tf.keras.utils.to_categorical(CM, num_classes=NUM_CHANGE_CLASSES).astype(np.float32)


def load_numpy_arrays(im1_fp_str, im2_fp_str, label1_fp_str, label2_fp_str):
    # 🚨 路径解码修复：在传入 cv2 和 create_change_map_label 之前，显式解码所有 Tensor
    im1_fp_str_decoded = im1_fp_str.numpy().decode('utf-8')
    im2_fp_str_decoded = im2_fp_str.numpy().decode('utf-8')
    label1_fp_str_decoded = label1_fp_str.numpy().decode('utf-8')
    label2_fp_str_decoded = label2_fp_str.numpy().decode('utf-8')

    # 图像加载和归一化 (使用解码后的路径)
    im1 = cv2.cvtColor(cv2.imread(im1_fp_str_decoded, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
    im2 = cv2.cvtColor(cv2.imread(im2_fp_str_decoded, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

    # 生成变化标签 (使用解码后的路径)
    Y = create_change_map_label(label1_fp_str_decoded, label2_fp_str_decoded)

    return im1, im2, Y

@tf.function(input_signature=[tf.TensorSpec(None, tf.string), tf.TensorSpec(None, tf.string), tf.TensorSpec(None, tf.string), tf.TensorSpec(None, tf.string)])
def load_cd_data_wrapper(im1_fp, im2_fp, label1_fp, label2_fp):

    X1, X2, Y = tf.py_function(
        load_numpy_arrays, [im1_fp, im2_fp, label1_fp, label2_fp], [tf.float32, tf.float32, tf.float32]
    )
    # 确保输出的形状是固定的，否则 tf.data.Dataset 会报错
    X1.set_shape(INPUT_SHAPE)
    X2.set_shape(INPUT_SHAPE)
    Y.set_shape((IMG_HEIGHT, IMG_WIDTH, NUM_CHANGE_CLASSES))
    return (X1, X2), Y


def create_cd_dataset(batch_size, is_training):
    # 收集文件路径，逻辑不变
    img_files = glob.glob(os.path.join(IM1_DIR, '*.png'))
    patch_paths = []

    for img_path_t1 in img_files:
        file_id = os.path.basename(img_path_t1)
        path_t2 = os.path.join(IM2_DIR, file_id)
        path_l1 = os.path.join(LABEL1_DIR, file_id) # .png
        path_l2 = os.path.join(LABEL2_DIR, file_id) # .png

        if os.path.exists(path_t2) and os.path.exists(path_l1) and os.path.exists(path_l2):
             patch_paths.append((img_path_t1, path_t2, path_l1, path_l2))

    if not patch_paths:
        raise ValueError("未能找到匹配的 T1/T2/L1/L2 图像对，请检查路径和文件格式。")

    np.random.shuffle(patch_paths)
    split_idx = int(len(patch_paths) * 0.95)

    current_paths = patch_paths[:split_idx] if is_training else patch_paths[split_idx:]

    paths_tuple = tuple([p[i] for p in current_paths] for i in range(4))
    dataset = tf.data.Dataset.from_tensor_slices(paths_tuple)

    if is_training:
        dataset = dataset.shuffle(buffer_size=min(len(current_paths), 5000))

    dataset = dataset.map(load_cd_data_wrapper, num_parallel_calls=tf.data.AUTOTUNE)

    # 确保 map 和 batch 正常工作
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset, len(current_paths)


# --- 6. 主运行函数 ---

def run_change_detection_finetuning():
    print("\n--- 阶段 I: 数据集准备 (SECOND Changes) ---")

    try:
        train_dataset, num_train = create_cd_dataset(BATCH_SIZE, is_training=True)
        val_dataset, num_val = create_cd_dataset(BATCH_SIZE, is_training=False)

        print(f"训练样本总数: {num_train}, 验证样本总数: {num_val}")
        if num_train == 0 or num_val == 0:
            print("❌ 错误：训练集或验证集样本数为零。请检查文件是否存在。")
            return

    except ValueError as e:
        print(f"❌ 数据集加载失败: {e}")
        return

    print("\n--- 阶段 II: 模型构建与权重迁移 ---")

    cd_model = build_pseudo_siamese_unet(INPUT_SHAPE, NUM_CHANGE_CLASSES)

    cd_model = transfer_weights(cd_model, LULC_WEIGHTS_PATH)

    # 冻结编码器层
    for layer in cd_model.layers:
        if 't1' in layer.name or 't2' in layer.name or 'bottleneck' in layer.name:
            layer.trainable = False

    cd_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes=NUM_CHANGE_CLASSES)]
    )

    # 模型检查点保存到 Drive
    cd_checkpoint_callback = ModelCheckpoint(
        filepath=CD_WEIGHTS_PATH, monitor='val_mean_io_u', mode='max', save_best_only=True, verbose=1
    )
    print(f"变化检测权重将保存到 Drive: {CD_WEIGHTS_PATH}")

    print(f"\n--- 阶段 III: 开始变化检测微调 (目标: {EPOCHS} 个 Epoch, 只训练解码器) ---")

    cd_model.fit(
        train_dataset,
        epochs=EPOCHS,
        validation_data=val_dataset,
        callbacks=[cd_checkpoint_callback],
        verbose=1
    )

    print("\n🎉 变化检测微调完成！")

if __name__ == '__main__':
    run_change_detection_finetuning()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Google Drive 挂载成功。

--- 阶段 I: 数据集准备 (SECOND Changes) ---
训练样本总数: 2819, 验证样本总数: 149

--- 阶段 II: 模型构建与权重迁移 ---
✅ LULC 预训练权重已加载到临时模型。
✅ 权重迁移完成：LULC 编码器权重已成功赋给 Pseudo-Siamese 模型的 T1/T2 编码器。
变化检测权重将保存到 Drive: /content/drive/MyDrive/LULC_Pretrain_Weights/best_cd_finetune_weights.h5

--- 阶段 III: 开始变化检测微调 (目标: 10 个 Epoch, 只训练解码器) ---
Epoch 1/10
[1m 82/705[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m3:55[0m 378ms/step - accuracy: 0.2872 - loss: 1.7967 - mean_io_u_2: 0.3750

KeyboardInterrupt: 