In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, ConvLSTM2D, BatchNormalization, Conv3D,
    Dense, Multiply
)
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import joblib, os
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import TimeDistributed



2025-10-25 15:41:57.611297: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

df = pd.read_csv("/mnt/e/pbl7/clean_weather_data_hourly.csv")
features = ["T2M", "QV2M", "PS", "WS10M", "PRECTOTCORR", "CLRSKY_SFC_SW_DWN"]

df = df.dropna(subset=features)
df["datetime"] = pd.to_datetime(df["datetime"])
df = df.sort_values("datetime")

scalers = {}
for f in features:
    scalers[f] = MinMaxScaler()
    df[f] = scalers[f].fit_transform(df[[f]])

os.makedirs("saved_scaler", exist_ok=True)
for f in scalers:
    joblib.dump(scalers[f], f"saved_scaler/{f}_scaler.pkl")
print(" /ssave in saved_scaler/")


: 

# check scaler

In [3]:
import os, joblib
from pathlib import Path
scaler_dir = Path("saved_scaler")
pkl_files = sorted(list(scaler_dir.glob('*.pkl'))) if scaler_dir.exists() else []
if not pkl_files:
    print('KhÃ´ng tÃ¬m tháº¥y file scaler trong', scaler_dir)
else:
    for p in pkl_files:
        print('---', p.name, '---')
        scaler = joblib.load(p)
        attrs = {
            'data_min_': getattr(scaler, 'data_min_', None),
            'data_max_': getattr(scaler, 'data_max_', None),
            'data_range_': getattr(scaler, 'data_range_', None),
            'feature_range': getattr(scaler, 'feature_range', None),
        }
        for k,v in attrs.items():
            print(f'{k}: {v}')
        try:
            sample = [[0.5]] if (hasattr(scaler, 'feature_range') and scaler.feature_range==(0,1)) else [[0.0]]
            transformed = scaler.transform(sample)
            inverted = scaler.inverse_transform(transformed)
            print('sample:', sample, '-> transformed:', transformed, '-> inverse:', inverted)
        except Exception as e:
            print('KhÃ´ng thá»ƒ cháº¡y transform/inverse_transform:', e)
        print()


--- CLRSKY_SFC_SW_DWN_scaler.pkl ---
data_min_: [0.]
data_max_: [3.89]
data_range_: [3.89]
feature_range: (0, 1)
sample: [[0.5]] -> transformed: [[0.1285347]] -> inverse: [[0.5]]

--- PRECTOTCORR_scaler.pkl ---
data_min_: [0.]
data_max_: [2178.57]
data_range_: [2178.57]
feature_range: (0, 1)
sample: [[0.5]] -> transformed: [[0.00022951]] -> inverse: [[0.5]]

--- PS_scaler.pkl ---
data_min_: [82.65]
data_max_: [103.33]
data_range_: [20.68]
feature_range: (0, 1)
sample: [[0.5]] -> transformed: [[-3.97243714]] -> inverse: [[0.5]]

--- QV2M_scaler.pkl ---
data_min_: [0.7]
data_max_: [28.05]
data_range_: [27.35]
feature_range: (0, 1)
sample: [[0.5]] -> transformed: [[-0.00731261]] -> inverse: [[0.5]]

--- T2M_scaler.pkl ---
data_min_: [-4.84]
data_max_: [47.2]
data_range_: [52.04]
feature_range: (0, 1)
sample: [[0.5]] -> transformed: [[0.10261337]] -> inverse: [[0.5]]

--- WS10M_scaler.pkl ---
data_min_: [0.]
data_max_: [32.9]
data_range_: [32.9]
feature_range: (0, 1)
sample: [[0.5]] -> tra



# grid4d

In [4]:
lats = sorted(df["Latitude"].unique())
lons = sorted(df["Longitude"].unique())
times = sorted(df["datetime"].unique())

grids = []
for f in features:
    pivot = df.pivot_table(index="datetime", columns=["Latitude", "Longitude"], values=f, aggfunc="mean")
    pivot = pivot.reindex(times)
    missing_cols = set((lat, lon) for lat in lats for lon in lons) - set(pivot.columns)
    for col in missing_cols:
        pivot[col] = np.nan
    pivot = pivot.reindex(columns=pd.MultiIndex.from_product([lats, lons], names=["Latitude", "Longitude"]))
    pivot = pivot.fillna(pivot.mean().mean())
    arr = pivot.to_numpy().reshape(len(times), len(lats), len(lons))
    grids.append(arr)

grid = np.stack(grids, axis=-1)  
print("Grid shape:", grid.shape)


MemoryError: Unable to allocate 299. MiB for an array with shape (42192, 928) and data type float64

Data Gen

In [5]:

def data_generator(data, input_steps=168, output_steps=48, batch_size=2):
    while True:
        idxs = np.random.randint(0, len(data) - input_steps - output_steps, batch_size)
        X_batch, y_batch = [], []
        for i in idxs:
            X_batch.append(data[i:i+input_steps])
            y_batch.append(data[i+input_steps:i+input_steps+output_steps])
        yield np.array(X_batch, dtype=np.float32), np.array(y_batch, dtype=np.float32)
gen = data_generator(grid, 168, 48, batch_size=2)
X_sample, y_sample = next(gen)
print("Batch sample:", X_sample.shape, y_sample.shape)


NameError: name 'grid' is not defined

In [None]:
time_steps, h, w, c = X_sample.shape[1:]
output_steps = y_sample.shape[1]

inputs = Input(shape=(time_steps, h, w, c))

x = ConvLSTM2D(64, (3, 3), padding='same', return_sequences=True, activation='relu')(inputs)
x = BatchNormalization()(x)
x = ConvLSTM2D(64, (3, 3), padding='same', return_sequences=True, activation='relu')(x)
x = BatchNormalization()(x)

attn = Lambda(
    lambda t: tf.reduce_mean(t, axis=[2, 3, 4]),
    output_shape=(time_steps,),
    name="lambda_temporal_mean"
)(x)

attn = Dense(time_steps, activation='softmax', name="dense_attention")(attn)

attn = Lambda(
    lambda t: tf.reshape(t, (-1, time_steps, 1, 1, 1)),
    output_shape=(time_steps, 1, 1, 1),
    name="lambda_temporal_reshape"
)(attn)

x = Multiply(name="multiply_attention")([x, attn])

x = Lambda(
    lambda t: tf.reduce_mean(t, axis=1, keepdims=True),
    output_shape=(1, h, w, 64),
    name="lambda_temporal_pooling"
)(x)

x = Conv3D(64, (3, 3, 3), padding='same', activation='relu', name="conv3d_1")(x)
x = BatchNormalization()(x)
x = Conv3D(c, (3, 3, 3), padding='same', activation='linear', name="conv3d_out")(x)

x = Lambda(
    lambda t: tf.repeat(t, output_steps, axis=1),
    output_shape=(output_steps, h, w, c),
    name="lambda_temporal_repeat"
)(x)

model = Model(inputs, x, name="ConvLSTM_Hybrid_7day_2day")
model.compile(optimizer=Adam(1e-4), loss='mse')
model.summary()

In [None]:

history = model.fit(
    gen,
    steps_per_epoch=10,  
    epochs=3,
    verbose=1
)

model.save("ConvLSTM_Hybrid_7day_2day_generator.h5")
print("model ConvLSTM hybrid: ConvLSTM_Hybrid_7day_2day_generator.h5")

X_test, _ = next(gen)
pred = model.predict(X_test)
print("ðŸ”® Output shape:", pred.shape)