In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import (
    Input, Dense, Conv2D, Conv2DTranspose, MaxPooling2D, UpSampling2D, Flatten, Reshape, Dropout, LSTM,
    RepeatVector
)
from keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr

In [None]:
out_dir = '/content/drive/MyDrive/Physics/Dataset10'

## Data Preprocessing

In [None]:
x_train = np.load(f'{out_dir}/x_NOAA_train.npy',mmap_mode = 'r')
y_train = np.load(f'{out_dir}/y_NOAA_train.npy',mmap_mode = 'r')
x_test = np.load(f'{out_dir}/x_NOAA_test.npy',mmap_mode = 'r')
y_test = np.load(f'{out_dir}/y_NOAA_test.npy',mmap_mode = 'r')

In [None]:
def scale_data(data):
    # Initialize the scaled data array with the same shape as the input data
    scaled_data = np.zeros_like(data, dtype=np.float32)
    # Initialize arrays to store the min and max values for each sample
    min_vals = np.zeros((data.shape[0], 1, 1, 1))
    max_vals = np.zeros_like(min_vals)

    # Iterate through each sample in the data
    for i in range(data.shape[0]):
        # Calculate the min and max values for the current sample
        min_vals[i] = np.min(data[i])
        max_vals[i] = np.max(data[i])
        # Apply the scaling operation
        scaled_data[i] = (data[i] - min_vals[i]) / (max_vals[i] - min_vals[i])

    return scaled_data, min_vals, max_vals

x_train_scaled,_,_ = scale_data(x_train)
x_test_scaled,_,_ = scale_data(x_test)
y_train_scaled,_,_ = scale_data(y_train)
y_test_scaled,min_vals,max_vals = scale_data(y_test)


In [None]:
np.save(f'{out_dir}/x_train_scaled.npy', x_train_scaled)
np.save(f'{out_dir}/x_test_scaled.npy', x_test_scaled)
np.save(f'{out_dir}/y_train_scaled.npy', y_train_scaled)
np.save(f'{out_dir}/y_test_scaled.npy', y_test_scaled)
np.save(f'{out_dir}/y_test_min_vals.npy', min_vals)
np.save(f'{out_dir}/y_test_max_vals.npy', max_vals)

In [None]:
x_train_scaled = np.load(f'{out_dir}/x_train_scaled.npy', mmap_mode='r+')
x_test_scaled = np.load(f'{out_dir}/x_test_scaled.npy', mmap_mode='r+')
y_train_scaled = np.load(f'{out_dir}/y_train_scaled.npy', mmap_mode='r+')
y_test_scaled = np.load(f'{out_dir}/y_test_scaled.npy', mmap_mode='r+')
x_train_scaled = x_train_scaled.reshape(x_train_scaled.shape[0], 180, 360, 1)
x_test_scaled = x_test_scaled.reshape(x_test_scaled.shape[0], 180, 360, 1)

min_vals = np.load(f'{out_dir}/y_test_min_vals.npy')
max_vals = np.load(f'{out_dir}/y_test_max_vals.npy')

In [None]:
def prepare_convLSTM_data(x_data, y_data, num_of_sources, input_length=3, forecast_horizon=3):
    X, y = [], []

    # Calculate the number of samples per source
    samples_per_source = len(x_data) // num_of_sources

    for i in range(num_of_sources):
        # Calculate start and end indices based on the number of samples per source
        start_idx = i * samples_per_source
        end_idx = start_idx + samples_per_source

        # Slice x_data and y_data to get data for the current source
        x_source_data = x_data[start_idx:end_idx]
        y_source_data = y_data[start_idx:end_idx]

        # Reshape each source's sample into a new dimension
        x_sample = x_source_data.reshape(samples_per_source, 180, 360, 1)
        y_sample = y_source_data.reshape(samples_per_source, 180, 360, 1)

        # Split each sample into input and output sequences
        for j in range(samples_per_source - input_length - forecast_horizon + 1):
            X.append(x_sample[j:j+input_length])
            y.append(y_sample[j+input_length:j+input_length+forecast_horizon])

    return np.array(X), np.array(y)


def restore_original_scale(scaled_data, min_vals, max_vals):

    restored_data = scaled_data * (max_vals - min_vals) + min_vals
    return restored_data


# Define input and output lengths
input_length = 3
forecast_horizon = 3

# Calculate the number of sources
num_of_sources = len(x_train_scaled) // 1040

# Prepare ConvLSTM training dataset
x_convLSTM_train, y_convLSTM_train = prepare_convLSTM_data(x_train_scaled, y_train_scaled, num_of_sources, input_length, forecast_horizon)
np.save(f'{out_dir}/x_convLSTM_train.npy', x_convLSTM_train)
np.save(f'{out_dir}/y_convLSTM_train.npy', y_convLSTM_train)


# Prepare ConvLSTM testing dataset
x_convLSTM_test, y_convLSTM_test = prepare_convLSTM_data(x_test_scaled, y_test_scaled, 6, input_length, forecast_horizon)
np.save(f'{out_dir}/x_convLSTM_test.npy', x_convLSTM_test)
np.save(f'{out_dir}/y_convLSTM_test.npy', y_convLSTM_test)


min_vals, max_vals = prepare_convLSTM_data(min_vals, max_vals, 6, input_length, forecast_horizon)
np.save(f'{out_dir}/min_vals_convlstm.npy', min_vals)
np.save(f'{out_dir}/max_vals_convlstm.npy', max_vals)


_, y_convLSTM_test_noscale = prepare_convLSTM_data(x_test, y_test, 6, input_length, forecast_horizon)
np.save('/content/drive/MyDrive/Physics/Dataset10/y_convLSTM_test_3D.npy', y_convLSTM_test_noscale)

y_convLSTM_test_restored = restore_original_scale(y_convLSTM_test, min_vals, max_vals)
np.save(f'{out_dir}/y_convLSTM_test_restored.npy', y_convLSTM_test_restored)


In [None]:
x_convLSTM_train_scaled = np.load(f'{out_dir}/x_convLSTM_train.npy', mmap_mode='r')
y_convLSTM_train_scaled = np.load(f'{out_dir}/y_convLSTM_train.npy', mmap_mode='r')
x_convLSTM_test_scaled = np.load(f'{out_dir}/x_convLSTM_test.npy', mmap_mode='r')
y_convLSTM_test_scaled = np.load(f'{out_dir}/y_convLSTM_test.npy', mmap_mode='r')
y_convLSTM_test = np.load(f'{out_dir}/y_convLSTM_test_3D.npy', mmap_mode='r')

min_vals_convlstm = np.load(f'{out_dir}/min_vals_convlstm.npy', mmap_mode='r')
max_vals_convlstm = np.load(f'{out_dir}/max_vals_convlstm.npy', mmap_mode='r')

y_convLSTM_test_restored = np.load(f'{out_dir}/y_convLSTM_test_restored.npy', mmap_mode='r')




## Model Training

In [None]:
# Define input shape and model architecture
input_shape = (None, 3, 180, 360, 1)
inputs = layers.Input(shape=input_shape[1:])

# ConvLSTM layers
x = layers.ConvLSTM2D(32, (7, 7), padding="same", return_sequences=True, activation="relu")(inputs)
x = layers.ConvLSTM2D(32, (7, 7), padding="same", return_sequences=True, activation="relu")(x)

# Output layer
outputs = layers.Conv3D(1, (7, 7, 7), activation="linear", padding="same")(x)

# Model setup
model = models.Model(inputs, outputs)
model.compile(loss='mean_squared_error', optimizer='adam')

# Display model architecture
model.summary()


In [None]:
save_dir = '/content/drive/MyDrive/Physics/NOAA/'  # Directory for saving model

# Setup callbacks for saving model, reducing learning rate, and early stopping
model_checkpoint = ModelCheckpoint(
    filepath=f'{save_dir}ConvLSTM-Origin-NOAA.h5', monitor='val_loss',
    save_best_only=True, verbose=1, mode='min', save_format='h5'
)
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=30, min_lr=1e-5
)
early_stop = EarlyStopping(
    monitor='val_loss', patience=45, verbose=1, restore_best_weights=True
)

# Train the model
history = model.fit(
    x_convLSTM_train_scaled, y_convLSTM_train_scaled, epochs=100, batch_size=4,
    callbacks=[reduce_lr, early_stop, model_checkpoint],
    validation_data=(x_convLSTM_test_scaled, y_convLSTM_test_scaled), shuffle=True
)


## Model testing

In [None]:
model = models.load_model(f'{save_dir}best_model_convlstm.h5')
res_lstm = model.predict(x_convLSTM_test_scaled)
res_lstm_restored = restore_original_scale(res_lstm, min_vals_convlstm, max_vals_convlstm)

In [None]:
res_lstm_conv = res_lstm_restored[:,0,:,:,:]

true_state = y_convLSTM_test[:,0,:,:,:]

# Initialize lists to store results
ssim_scores = []
psnr_scores = []
inference_times = []

# Define the number of cases
num_cases = 6

# Calculate the size of each case
case_size = len(res_lstm) // num_cases
def apply_mask(original_images, predicted_images):
    masked_predicted_images = []
    for original, predicted in zip(original_images, predicted_images):
        mask = original == 0

        masked_predicted = np.where(mask, 0, predicted)
        masked_predicted_images.append(masked_predicted)

    return masked_predicted_images
# Loop over each case
for i in range(num_cases):
    # Get the subset of data for the current case
    res_subset = res_lstm_conv[i * case_size : (i + 1) * case_size]
    true_subset = true_state[i * case_size : (i + 1) * case_size]
    res_subset = apply_mask(true_subset, res_subset)
    # Initialize lists to store metrics for the current case
    case_ssim_scores = []
    case_psnr_scores = []
    case_inference_times = []

    # Calculate metrics for each sample in the current case
    for res, true in zip(res_subset, true_subset):
        # Measure inference time
        start_time = time.time()

        # Calculate SSIM
        ssim_score = ssim(res, true, multichannel=True)
        case_ssim_scores.append(ssim_score)

        # Calculate PSNR
        psnr_score = psnr(res, true, data_range=res.max() - res.min())
        case_psnr_scores.append(psnr_score)

        # Measure inference time
        end_time = time.time()
        inference_time = end_time - start_time
        case_inference_times.append(inference_time)

    # Calculate the average SSIM, PSNR, and inference time for the current case
    avg_ssim_score = sum(case_ssim_scores) / len(case_ssim_scores)
    avg_psnr_score = sum(case_psnr_scores) / len(case_psnr_scores)
    avg_inference_time = sum(case_inference_times) / len(case_inference_times)

    # Append the results to the lists
    ssim_scores.append(avg_ssim_score)
    psnr_scores.append(avg_psnr_score)
    inference_times.append(avg_inference_time)

# Print the results for each case
for i in range(num_cases):
    print(f"Case {i+1}:")
    print(f"Average SSIM: {ssim_scores[i]}")
    print(f"Average PSNR: {psnr_scores[i]}")
    print(f"Average Inference Time: {inference_times[i]} seconds")
    print()

In [None]:
average_ssim = sum(avg_ssim_score) / len(avg_ssim_score)

# Calculate average PSNR
average_psnr = sum(avg_psnr_score) / len(avg_psnr_score)

# Print average values
print("Average SSIM:", average_ssim)
print("Average PSNR:", average_psnr)

### Plot Change

In [None]:
import matplotlib.pyplot as plt

# Data
num_sensors = [200, 240, 280, 300, 320, 340]
ssim = [
    0.7237781062141283,
    0.7539283448007513,
    0.7524165386144361,
    0.7638430242366332,
    0.7577072242956256,
    0.7650417976687304
]

# PSNR values from each case
psnr = [
    27.63017235612781,
    29.54727918068748,
    29.667194725218977,
    30.49954460895071,
    29.44647984272573,
    30.73008736135083
]

fig, ax1 = plt.subplots(figsize=(10, 5)) 
ax1.plot(num_sensors, ssim, color='blue', marker='o', label='SSIM')
ax1.set_xlabel('Number of Sensors', fontsize=14) 
ax1.set_ylabel('SSIM', color='blue', fontsize=14) 
ax1.tick_params(axis='y', labelcolor='blue')
ax1.tick_params(axis='both', labelsize=12) 
ax1.grid(True)

ax2 = ax1.twinx() 
ax2.plot(num_sensors, psnr, color='orange', marker='o', label='PSNR')
ax2.set_ylabel('PSNR', color='orange', fontsize=14) 
ax2.tick_params(axis='y', labelcolor='orange')
ax2.tick_params(axis='both', labelsize=12) 

plt.title('SSIM and PSNR vs. Number of Sensors on Test Datasets', fontsize=16)  

fig.tight_layout() 
plt.show()