In [33]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import os
from io import open
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader
import time

In [5]:
from dataset import Step2_DataSet
from time_series_models import GRUSeq2SeqWithAttention, TimeSeriesModel, SequenceDataset

In [2]:
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [3]:
import kgml_lib
# define functions from kgml_lib
Z_norm = kgml_lib.Z_norm 
Z_norm_reverse = kgml_lib.Z_norm_reverse
# get_gpu_memory = kgml_lib.get_gpu_memory

In [6]:
root_dir = 'E:/PyKGML/deposit_code_v2/'
data_path = root_dir +  'processed_data/'
output_path = root_dir + 'test_results/'

input_data = 'recotest_data_scaled_v4_100sample.sav'
sample_index_file = "traindataset_split_year_v1.sav"

pretrained_model = "recotest_v11_exp4.sav_step1"
output_model = "recotest_v11_exp4_sample.sav_step2"
synthetic_data = "sys_data2.sav"

dataset = Step2_DataSet(data_path, input_data, output_path, sample_index_file)
dataset.load_step2_data()

dataset.prepare_step2_data()

torch.Size([6570, 100, 19]) torch.Size([6570, 100, 3]) torch.Size([18, 100, 1])
['RADN', 'TMAX_AIR', 'TDIF_AIR', 'HMAX_AIR', 'HDIF_AIR', 'WIND', 'PRECN', 'Crop_Type', 'GPP', 'Year', 'TBKDS', 'TSAND', 'TSILT', 'TFC', 'TWP', 'TKSat', 'TSOC', 'TPH', 'TCEC']


In [7]:
X = dataset.X  #[365*18, 100, 19]
X = torch.transpose(X,1, 0) #[100, 365*18, 19]

Y1 = dataset.Y1 #[365*18, 100, 3]
Y1 = torch.transpose(Y1,1, 0) #[100, 365*18, 3]

Y2 = dataset.Y2 #[18, 100, 1]
Y2 = torch.transpose(Y2,1, 0)

y_scaler = dataset.Y1_scaler

In [8]:
X.shape, Y1.shape, y_scaler.shape

(torch.Size([100, 6570, 19]), torch.Size([100, 6570, 3]), (3, 2))

In [22]:
6570/365*100

1800.0

In [10]:
test_size = 365*2
X_train = X[:, :-test_size, :]
X_test  = X[:, -test_size:, :]
Y_train = Y1[:, :-test_size, :]
Y_test  = Y1[:, -test_size:, :]

In [11]:
X_train.shape, Y_test.shape

(torch.Size([100, 5840, 19]), torch.Size([100, 730, 3]))

In [12]:
# Create Pytorch dataloader
sequence_length = 365  # Use 365 consecutive days as a sample

# Create Dataset objects for training and testing.
train_dataset = SequenceDataset(X_train, Y_train, sequence_length)
test_dataset = SequenceDataset(X_test, Y_test, sequence_length)

In [23]:
len(train_dataset)

1600

In [42]:
# Create DataLoaders.
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [45]:
for batch_x, batch_y in train_loader:
    batch_x = batch_x.permute(0, 2, 1)
    batch_y = batch_y.permute(0, 2, 1)
    break

In [46]:
batch_x.shape, batch_y.shape

(torch.Size([32, 19, 365]), torch.Size([32, 3, 365]))

### Change to another method to check if we get same shape of batch X

In [31]:
_X = X.reshape(100*18, 365,19)
_Y1 = Y1.reshape(100*18, 365, 3)
_X.shape, _Y1.shape

(torch.Size([1800, 365, 19]), torch.Size([1800, 365, 3]))

In [30]:
X[1,:365,:] == _X[18,:,:]

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

In [34]:
class TimeSeriesDataset(Dataset):
    def __init__(self, inputs, outputs):
        """
        inputs:  (num_samples=1800, seq_len=365, input_features=16)
        outputs: (num_samples=1800, seq_len=365, output_features=3)
        """
        self.inputs = torch.FloatTensor(inputs)  
        self.outputs = torch.FloatTensor(outputs) 

    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        # 输入形状调整: (16, 365) -> Conv1d要求的(channels, seq_len)
        x = self.inputs[idx].permute(1, 0)  # (16, 365)
        y = self.outputs[idx]                # (365, 3)
        return x, y

In [35]:
# 创建数据集和DataLoader
dataset = TimeSeriesDataset(_X, _Y1)
train_size = int(0.8 * len(dataset))
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset)-train_size])

batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=1, shuffle=False)

In [36]:
for batch_x, batch_y in train_loader:
    break

In [37]:
batch_x.shape, batch_y.shape

(torch.Size([32, 19, 365]), torch.Size([32, 365, 3]))

## CNN model

In [55]:
# ==============================
# 1D CNN Model, Time series regression
# ==============================
class TemporalCNN(nn.Module):
    def __init__(self, input_channels=19, output_channels=3):
        super().__init__()
        # Assume seq_len is 365
        self.cnn = nn.Sequential(
            # Block 1: Output is (batch_size, 32, seq_len)
            nn.Conv1d(input_channels, 32, kernel_size=5, padding='same'),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            
            # Block 2: Output is (batch_size, 64, seq_len)
            nn.Conv1d(32, 64, kernel_size=3, padding='same'),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            
            # Block 3: Output is (batch_size, 3, seq_len)
            nn.Conv1d(64, output_channels, kernel_size=1)
        )

    def forward(self, x):
        # CNN requires dimmesion as: (batch, channels, seq)
        x = x.permute(0, 2, 1)  # -> (batch, input_features, 365)
        out = self.cnn(x)          # (batch, 3, 365)
        return out.permute(0, 2, 1)  # change back to (batch, 365, 3)

In [68]:
class CNNLSTM(nn.Module):
    def __init__(self, input_channels=19, seq_len=365, output_size=3):
        super().__init__()
        
        # 1D CNN Block: Extract local time series features
        self.cnn = nn.Sequential(
            nn.Conv1d(input_channels, 64, kernel_size=5, padding='same'),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Conv1d(64, 128, kernel_size=3, padding='same'),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        
        # LSTM Block: Extract long time features
        self.lstm = nn.LSTM(
            input_size=128,   # input dim = CNN output channels
            hidden_size=256,
            num_layers=2,
            bidirectional=True,
            batch_first=True  # output is (batch, seq, features)
        )
        
        # FC layer
        self.fc = nn.Sequential(
            nn.Linear(256*2, 128),  # bi-direction LSTM *2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_size)
        )

    def forward(self, x):
        # Origin x shape is: (batch_size, 365, input_features)
        # Change shape for Conv1d: (batch, channels, seq=365)
        x = x.permute(0, 2, 1)  # -> (batch, input_features, 365)
        
        # 1D CNN
        cnn_out = self.cnn(x)  # (batch, 128, 365)
        
        # change shape for LSTM: (batch, seq, features)
        lstm_input = cnn_out.permute(0, 2, 1)  # -> (batch, 365, 128)
        
        # LSTM
        lstm_out, _ = self.lstm(lstm_input)  # (batch, 365, 512)  (bi-direction, hidden_size*2)
        
        # FC layer
        output = self.fc(lstm_out)  # (batch, 365, 3)
        return output

In [83]:
class CNN_LSTM_Attension(CNNLSTM):
    def __init__(self, input_channels=19, seq_len=365, output_size=3,attn_heads=8, attn_dropout=0.1):
        super().__init__(input_channels, seq_len, output_size)
        # Attension block after LSTM
        embed_size = self.lstm.hidden_size*2
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=8)

        # LayerNorm after the Attension
        self.norm = nn.LayerNorm(embed_size)
        # print('embed_size: ', embed_size)

    def forward(self, x):
        # Input x shape: (batch_size, 365, 16)
        # Change shape for Conv1d: (batch, channels, seq)
        x = x.permute(0, 2, 1)  # -> (batch, 16, 365)
        
        # 1D CNN
        cnn_out = self.cnn(x)  # (batch, 128, 365)
        
        # Change shape for LSTM: (batch, seq, features)
        lstm_input = cnn_out.permute(0, 2, 1)  # -> (batch, 365, 128)
        
        # LSTM
        lstm_out, _ = self.lstm(lstm_input)  # (batch, 365, 512)  (bi-direction hidden_size*2)

        # Attension
        attn_out, _ = self.attention(
            query=lstm_out,
            key=lstm_out,
            value=lstm_out,
            need_weights=False
        )
        # print('attn_out + lstm_out: ', (attn_out + lstm_out).shape)
        attn_out = self.norm(attn_out + lstm_out)  # Residual connection

        # FC layer
        output = self.fc(attn_out)  # (batch, 365, 3)
        return output

In [81]:
model.lstm.hidden_size

256

In [84]:
# 初始化模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = TemporalCNN().to(device)
# model = CNNLSTM().to(device)

model = CNN_LSTM_Attension().to(device)
criterion = nn.MSELoss()  # 逐时间步计算损失
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练循环
num_epochs = 60
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    
    for batch_x, batch_y in train_loader:
        batch_x = batch_x.to(device)  # (batch,input_features,365)
        batch_y = batch_y.to(device)  # (batch,output_features,365)
        
        optimizer.zero_grad()
        pred = model(batch_x)                    # pred: (batch,365,3)
        loss = criterion(pred, batch_y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * batch_x.size(0)
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)  # (batch,input_features,365)
            batch_y = batch_y.to(device)  # (batch,output_features,365)
            pred = model(batch_x)
            val_loss += criterion(pred, batch_y).item() * batch_x.size(0)
    
    train_loss /= len(train_loader.dataset)
    val_loss /= len(val_loader.dataset)
    
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}')

Epoch 1/60
Train Loss: 0.2869 | Val Loss: 0.0796
Epoch 2/60
Train Loss: 0.1364 | Val Loss: 0.0654
Epoch 3/60
Train Loss: 0.1188 | Val Loss: 0.0564
Epoch 4/60
Train Loss: 0.1060 | Val Loss: 0.0603
Epoch 5/60
Train Loss: 0.0966 | Val Loss: 0.0538
Epoch 6/60
Train Loss: 0.0926 | Val Loss: 0.0502
Epoch 7/60
Train Loss: 0.0848 | Val Loss: 0.0433
Epoch 8/60
Train Loss: 0.0797 | Val Loss: 0.0442
Epoch 9/60
Train Loss: 0.0778 | Val Loss: 0.0505
Epoch 10/60
Train Loss: 0.0773 | Val Loss: 0.0383
Epoch 11/60
Train Loss: 0.0712 | Val Loss: 0.0389
Epoch 12/60
Train Loss: 0.0701 | Val Loss: 0.0412
Epoch 13/60
Train Loss: 0.0683 | Val Loss: 0.0434
Epoch 14/60
Train Loss: 0.0660 | Val Loss: 0.0368
Epoch 15/60
Train Loss: 0.0706 | Val Loss: 0.0454
Epoch 16/60
Train Loss: 0.0677 | Val Loss: 0.0373
Epoch 17/60
Train Loss: 0.0644 | Val Loss: 0.0463
Epoch 18/60
Train Loss: 0.0611 | Val Loss: 0.0387
Epoch 19/60
Train Loss: 0.0628 | Val Loss: 0.0412
Epoch 20/60
Train Loss: 0.0608 | Val Loss: 0.0422
Epoch 21/