# Incremental Training
We used an additional 50 days of data to run incremental learning. Due to hardware limitations,   
we split the 50 days of data into 50 datasets and ran them sequentially, resulting in 50+1 models.  


In [1]:
import os
import shutil
from datetime import datetime, timedelta

#  Set folder paths
data_folder = './weekData'
output_folder = './weekData'
max_files_per_folder = 288
max_gap_minutes = 25  # 25 minutes, equivalent to 5 files

#  Get list of file names and sort by time
file_list = sorted(
    [f for f in os.listdir(data_folder) if f.endswith('.png')],
    key=lambda x: datetime.strptime(x.split('_')[2][:12], '%Y%m%d%H%M')  # Extract and parse the timestamp string
)

#  Initialize variables
current_folder_index = 5
current_batch = []
previous_time = None

#  Iterate through files and group them
for file_name in file_list:
    #  Extract timestamp from the current file
    timestamp_str = file_name.split('_')[2][:12]  # Extract time part and take up to minutes
    current_time = datetime.strptime(timestamp_str, '%Y%m%d%H%M')

    #  Check if a new folder is needed
    if (
        previous_time is None or  # If it's the first file
        len(current_batch) >= max_files_per_folder or  # Current folder has reached max number of files
        (current_time - previous_time > timedelta(minutes=max_gap_minutes))  # Time gap exceeds the max limit
    ):
        #  If the current folder is not empty, save the current batch of files to a new folder
        if current_batch:
            target_folder = os.path.join(output_folder, str(current_folder_index))
            os.makedirs(target_folder, exist_ok=True)
            for f in current_batch:
                shutil.move(os.path.join(data_folder, f), os.path.join(target_folder, f))
            print(f"Created folder {target_folder} with {len(current_batch)} files.")

            #  Update folder index
            current_folder_index += 1
            current_batch = []

    #  Add the current file to the batch
    current_batch.append(file_name)
    previous_time = current_time

#  Save the last batch of files to a new folder
if current_batch:
    target_folder = os.path.join(output_folder, str(current_folder_index))
    os.makedirs(target_folder, exist_ok=True)
    for f in current_batch:
        shutil.move(os.path.join(data_folder, f), os.path.join(target_folder, f))
    print(f"Created folder {target_folder} with {len(current_batch)} files.")


# Start

In [3]:
import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

def get_next_model_step(folder_path='./Models/Add/'):
    #  Regular expression to match file names
    model_pattern = re.compile(r'SP480_(\d+)\.h5')
    
    #  Get all files in the folder
    files = os.listdir(folder_path)
    
    #  Find all files matching the SP480_X.h5 naming format and extract numbers
    model_numbers = []
    for file in files:
        match = model_pattern.match(file)
        if match:
            model_numbers.append(int(match.group(1)))

    #  If model files are found, return the max number + 1; otherwise, return 1
    return max(model_numbers) + 1 if model_numbers else 1


In [4]:
import os
import cv2
import numpy as np
from skimage import morphology
import matplotlib.pyplot as plt

def readPicture(times, model):
    #  Input folder path
    input_folder = './weekData/' + str(times)

    #  Initialize lists to store original and processed images
    original_images = []
    processed_images = []

    #  Set minimum rain area threshold
    min_rain_area = 10  # Adjust as needed

    #  Iterate through each image in the folder
    for filename in sorted(os.listdir(input_folder)):
        #  Check if the file is an image
        if filename.endswith('.png'):
            input_path = os.path.join(input_folder, filename)
            
            #  Step 1: Read the image
            img = cv2.imread(input_path)
            if img is None:
                print(f"Error: Image {filename} failed to load, skipping this file.")
                continue
            
            #  Step 2: Convert to grayscale
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            #  Step 3: Save the unprocessed grayscale image to original_images
            original_images.append(gray_img)
            
            #  Step 4: Apply binary thresholding
            _, binary_img = cv2.threshold(gray_img, 30, 255, cv2.THRESH_BINARY)
            
            #  Step 5: Remove small areas
            binary_img = morphology.remove_small_objects(binary_img.astype(bool), min_rain_area)
            binary_img = (binary_img * 255).astype(np.uint8)
            
            #  Step 6: Apply mask
            processed_img = cv2.bitwise_and(gray_img, gray_img, mask=binary_img)
            
            #  Step 7: Normalize
            normalized_img = (processed_img / 255.0)  # Normalize to 0-1 range
            
            #  Add processed image to processed_images
            processed_images.append(normalized_img)

    # print("Image processing complete. `original_images` and `processed_images` data are ready.")
    return processed_images


In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ConvLSTM2D, TimeDistributed, Dropout, Reshape
from tensorflow.keras.optimizers import Adam

def setDataShape(processed_images):
    #  Assume processed_images is ready, with each image size of (480, 480)
    processed_images = np.array(processed_images)  # Convert to NumPy array
    processed_images = processed_images[..., np.newaxis]  # Add a single channel (480, 480) -> (480, 480, 1)

    time_steps = 12  # Input time steps
    pred_steps = 12  # Prediction time steps
    num_samples = len(processed_images) - time_steps - pred_steps + 1

    #  Build input and output data
    X_train = np.array([processed_images[i:i + time_steps] for i in range(num_samples)])
    y_train = np.array([processed_images[i + time_steps:i + time_steps + pred_steps] for i in range(num_samples)])

    # print("Incremental training data shapes:")
    # print("X_train shape:", X_train.shape)  # (num_samples, time_steps, 480, 480, 1)
    # print("y_train shape:", y_train.shape)  # (num_samples, pred_steps, 480, 480, 1)

    return X_train, y_train

def data_generator(processed_images, time_steps, pred_steps, batch_size):
    num_samples = len(processed_images) - time_steps - pred_steps + 1
    while True:
        for i in range(0, num_samples, batch_size):
            X = np.array([processed_images[j:j + time_steps] for j in range(i, min(i + batch_size, num_samples))], dtype=np.float32)
            y = np.array([processed_images[j + time_steps:j + time_steps + pred_steps] for j in range(i, min(i + batch_size, num_samples))], dtype=np.float32)
            yield X, y

In [6]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from skimage.metrics import structural_similarity as ssim

def test(model):
    #  Folder path
    test_folder = './dataOneDayTest'
    time_steps = 12
    pred_steps = 12

    #  Load and preprocess test image data
    test_images = []
    image_files = sorted([f for f in os.listdir(test_folder) if f.endswith('.png')])
    for file in image_files:
        img = cv2.imread(os.path.join(test_folder, file), cv2.IMREAD_GRAYSCALE)
        resized_img = cv2.resize(img, (480, 480), interpolation=cv2.INTER_AREA)
        normalized_img = resized_img / 255.0  # Normalize
        test_images.append(normalized_img)

    #  Convert to NumPy array and add channel dimension
    test_images = np.array(test_images)[..., np.newaxis]  # Shape: (24, 480, 480, 1)

    #  Prepare test input data
    X_test = np.array([test_images[i:i + time_steps] for i in range(len(test_images) - time_steps - pred_steps + 1)])
    y_actual = np.array([test_images[i + time_steps:i + time_steps + pred_steps] for i in range(len(test_images) - time_steps - pred_steps + 1)])

    # print("X_test shape:", X_test.shape)  # (num_samples, time_steps, 480, 480, 1)
    # print("y_actual shape:", y_actual.shape)  # (num_samples, pred_steps, 480, 480, 1)

    #  Predict using the model
    y_pred = model.predict(X_test)

    #  Calculate MSE, MAE, and SSIM
    mse = mean_squared_error(y_actual.flatten(), y_pred.flatten())
    mae = mean_absolute_error(y_actual.flatten(), y_pred.flatten())

    #  Calculate Structural Similarity (SSIM)
    ssim_scores = []
    for i in range(pred_steps):
        ssim_score = ssim(y_actual[0, i, :, :, 0], y_pred[0, i, :, :, 0], data_range=1.0)
        ssim_scores.append(ssim_score)
    mean_ssim = np.mean(ssim_scores)

    #  Output evaluation metrics
    print(f"Mean Squared Error (MSE) on Test Set: {mse}")
    print(f"Mean Absolute Error (MAE) on Test Set: {mae}")
    print(f"Mean Structural Similarity (SSIM) on Test Set: {mean_ssim}")

    #  Visualize comparison of the first few predicted and actual results
    fig, axs = plt.subplots(12, 2, figsize=(10, 40))
    for i in range(12):
        #  Display predicted result
        axs[i, 0].imshow(y_pred[0, i, :, :, 0], cmap='gray')
        axs[i, 0].set_title(f"Predicted Frame {i+1}")
        axs[i, 0].axis('off')
        
        #  Display actual result
        axs[i, 1].imshow(y_actual[0, i, :, :, 0], cmap='gray')
        axs[i, 1].set_title(f"Actual Frame {i+1}")
        axs[i, 1].axis('off')

    plt.tight_layout()
    plt.show()
    return mse, mae, mean_ssim


In [7]:
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

start_from = get_next_model_step()
print(f"The next model number is: {start_from}")
max_times = 56
time_steps = 12
pred_steps = 12

#  Set up early stopping callback
early_stopping = EarlyStopping(
    monitor='loss',
    patience=3,
    min_delta=0.0001,
    restore_best_weights=True
)

#  Perform 16 rounds of fine-tuning and testing
for times in range(start_from, max_times+1):

    #  Load the existing model
    model_path = './Models/Add/SP480_' + str(times-1) + '.h5'
    model = load_model(model_path)

    #  Set incremental training parameters
    learning_rate = 1e-5  # Set a low learning rate

    #  Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    print(f"\nStarting fine-tuning and testing round {times}...")

    #  Clear memory and reset model state
    tf.keras.backend.clear_session()

    #  Read the current batch of images
    processed_images = readPicture(times, model)

    #  Call setDataShape function to get training data
    X_train, y_train = setDataShape(processed_images)

    #  Set batch size and steps per epoch
    batch_size = 8
    steps_per_epoch = (len(processed_images) - time_steps - pred_steps + 1) // batch_size

    history = model.fit(
        X_train, y_train,
        epochs=9,              # Maximum of 300 epochs
        batch_size=1,
        callbacks=[early_stopping]  # Use EarlyStopping
    )

    #  Save the fine-tuned model
    fine_tuned_model_path = f'./Models/Add/SP480_{times}.h5'
    model.save(fine_tuned_model_path)
    # print(f"Fine-tuned model saved to {fine_tuned_model_path}")

    #  Call test function and store results
    # mse, mae, mean_ssim = test(model)
    # test_results[times] = [mse, mae, mean_ssim]  # Store in an array

    # print(f"Round {times} test results - MSE: {mse}, MAE: {mae}, SSIM: {mean_ssim}")

# # Print all test results
# print("\nAll test results:")
# for i, (mse, mae, mean_ssim) in enumerate(test_results, start=1):
#     print(f"Model {i} after fine-tuning: MSE = {mse}, MAE = {mae}, SSIM = {mean_ssim}")


下一个模型编号为：51

正在进行第 51 次微调和测试...
Epoch 1/9
Epoch 2/9

ResourceExhaustedError: Graph execution error:

Detected at node 'gradient_tape/sequential/time_distributed_3/conv2d_2/Conv2D/Conv2DBackpropInput' defined at (most recent call last):
    File "E:\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "E:\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "E:\Python\Python310\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "E:\Python\Python310\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "E:\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell
      result = self._run_cell(
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell
      result = runner(coro)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
      coro.send(None)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Reol\AppData\Local\Temp\ipykernel_10332\2433934260.py", line 47, in <module>
      history = model.fit(
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\engine\training.py", line 997, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 576, in minimize
      grads_and_vars = self._compute_gradients(
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 634, in _compute_gradients
      grads_and_vars = self._get_gradients(
    File "e:\study-master\NUSCampus\sem1\IND5003\code\venv\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 510, in _get_gradients
      grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/sequential/time_distributed_3/conv2d_2/Conv2D/Conv2DBackpropInput'
OOM when allocating tensor with shape[12,480,480,8] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node gradient_tape/sequential/time_distributed_3/conv2d_2/Conv2D/Conv2DBackpropInput}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_5207]