In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import copy

In [2]:
# Load and prepare data
df = pd.read_csv('../data/mucnuoc_gio_preprocess.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

# Setup features and target
features = ['q64']  # Use all features for input
target = 'q64'  # Target feature for prediction

In [3]:
# Normalize data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# Scale all features for X
scaled_data = scaler_X.fit_transform(df[features])

# Scale only target feature for y (important for inverse transform)
target_data = scaler_y.fit_transform(df[[target]])


In [54]:
# Prepare sequences
X, y = [], []
target_idx = features.index(target)
past_window = 180
future_window = 180

for i in range(len(scaled_data) - past_window - future_window + 1):
    # Input: past_window timesteps with all features
    X_window = scaled_data[i:i+past_window]
    
    # Output: future_window timesteps with only target feature
    # Use target_data (scaled separately) instead of scaled_data
    y_sequence = target_data[i+past_window:i+past_window+future_window].flatten()
    
    X.append(X_window)
    y.append(y_sequence)

X = np.array(X, dtype=np.float32)  # Shape: (samples, past_window, n_features)
y = np.array(y, dtype=np.float32)  # Shape: (samples, future_window)

print(f"📊 Data shape: X={X.shape}, y={y.shape}")

📊 Data shape: X=(47144, 180, 1), y=(47144, 180)


In [55]:
# Train/test split
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

print(f"📊 Train shape: X_train={X_train.shape}, y_train={y_train.shape}")
print(f"📊 Test shape: X_test={X_test.shape}, y_test={y_test.shape}")

📊 Train shape: X_train=(37715, 180, 1), y_train=(37715, 180)
📊 Test shape: X_test=(9429, 180, 1), y_test=(9429, 180)


In [56]:
param_grid = {
    'batch_size': [32, 64],
    'epochs': [20, 30],
    'learning_rate': [0.001, 0.01],
    'num_units': [32, 64],
    'dropout_rate': [0.2, 0.3]
}

In [57]:
# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden1=64, hidden2=32, output_size=1, dropout_rate=0.1):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden1, batch_first=True, dropout=dropout_rate if hidden1 > 1 else 0)
        self.lstm2 = nn.LSTM(hidden1, hidden2, batch_first=True, dropout=dropout_rate if hidden2 > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden2, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, output_size)
        
    def forward(self, x):
        # First LSTM layer
        out, (h1, c1) = self.lstm1(x)
        
        # Second LSTM layer
        out, (h2, c2) = self.lstm2(out)
        
        # Take the last timestep
        out = out[:, -1, :]
        
        # Apply dropout
        out = self.dropout(out)
        
        # Fully connected layers
        out = self.relu(self.fc1(out))
        out = self.fc2(out)
        
        return out

In [58]:
# Early stopping class for PyTorch
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_loss = None
        self.counter = 0
        self.best_weights = None
        
    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.save_checkpoint(model)
        else:
            self.counter += 1
            
        if self.counter >= self.patience:
            if self.restore_best_weights:
                model.load_state_dict(self.best_weights)
            return True
        return False
    
    def save_checkpoint(self, model):
        self.best_weights = copy.deepcopy(model.state_dict())

# Training function
def train_model(model, train_loader, val_loader, epochs, learning_rate, early_stopping=None):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients (common with LSTMs)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        
        # Early stopping check
        if early_stopping is not None:
            if early_stopping(val_loss, model):
                print(f"   Early stopping at epoch {epoch+1}")
                break
    
    return train_losses, val_losses

# Model creation function
def create_lstm_model(num_units, learning_rate, dropout_rate, input_size, output_dim):
    model = LSTMModel(
        input_size=input_size,
        hidden1=num_units,
        hidden2=num_units//2,  # You can adjust this ratio
        output_size=output_dim,
        dropout_rate=dropout_rate
    )
    return model

# Convert data to PyTorch tensors
def prepare_data(X_train, y_train, X_test, y_test, batch_size, validation_split=0.2):
    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.FloatTensor(y_test)
    
    # Split training data for validation
    val_size = int(len(X_train_tensor) * validation_split)
    train_size = len(X_train_tensor) - val_size
    
    # Create validation split
    X_train_split = X_train_tensor[:train_size]
    y_train_split = y_train_tensor[:train_size]
    X_val_split = X_train_tensor[train_size:]
    y_val_split = y_train_tensor[train_size:]
    
    # Create datasets and dataloaders
    train_dataset = TensorDataset(X_train_split, y_train_split)
    val_dataset = TensorDataset(X_val_split, y_val_split)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, X_test_tensor, y_test_tensor

# Grid search with proper model recreation
def lstm_grid_search(param_grid, X_train, y_train, X_test, y_test, input_size, future_window):
    best_params = None
    best_score = float('inf')
    best_model = None
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    print("🔍 Starting LSTM Grid Search...")
    total_combinations = len(list(ParameterGrid(param_grid)))
    current_combination = 0
    
    for params in ParameterGrid(param_grid):
        current_combination += 1
        print(f"\n📈 Testing combination {current_combination}/{total_combinations}: {params}")
        
        # Create new LSTM model for each parameter combination
        model = create_lstm_model(
            num_units=params['num_units'],
            learning_rate=params['learning_rate'],
            dropout_rate=params['dropout_rate'],
            input_size=input_size,
            output_dim=future_window
        )
        
        # Prepare data loaders
        train_loader, val_loader, test_loader, X_test_tensor, y_test_tensor = prepare_data(
            X_train, y_train, X_test, y_test, 
            batch_size=params['batch_size'], 
            validation_split=0.2
        )
        
        # Early stopping to prevent overfitting
        early_stop = EarlyStopping(patience=5, restore_best_weights=True)
        
        # Train model
        try:
            train_losses, val_losses = train_model(
                model=model,
                train_loader=train_loader,
                val_loader=val_loader,
                epochs=params['epochs'],
                learning_rate=params['learning_rate'],
                early_stopping=early_stop
            )
            
            # Evaluate on test set
            model.eval()
            model = model.to(device)
            y_pred_list = []
            
            with torch.no_grad():
                for batch_x, _ in test_loader:
                    batch_x = batch_x.to(device)
                    outputs = model(batch_x)
                    y_pred_list.append(outputs.cpu().numpy())
            
            y_pred = np.concatenate(y_pred_list, axis=0)
            mse = mean_squared_error(y_test, y_pred)
            print(f"   MSE: {mse:.6f}")
            
            # Update best parameters
            if mse < best_score:
                best_score = mse
                best_params = params.copy()
                best_model = copy.deepcopy(model.state_dict())
                print(f"   🎯 New best score!")
                
        except Exception as e:
            print(f"   ❌ Error training model: {e}")
            continue
    
    print(f"\n🏆 Best Parameters: {best_params}")
    print(f"🏆 Best MSE: {best_score:.6f}")
    
    return best_params, best_score, best_model


In [59]:
# Run LSTM grid search
best_params, best_score, best_model_state = lstm_grid_search(
    param_grid=param_grid,
    X_train=X_train,  # Your training features (samples, timesteps, features)
    y_train=y_train,  # Your training targets
    X_test=X_test,    # Your test features
    y_test=y_test,    # Your test targets
    input_size=len(features),  # Number of input features
    future_window=future_window  # Output dimension
)

🔍 Starting LSTM Grid Search...

📈 Testing combination 1/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 10
   MSE: 0.016578
   🎯 New best score!

📈 Testing combination 2/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 6
   MSE: 0.002257
   🎯 New best score!

📈 Testing combination 3/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.001388
   🎯 New best score!

📈 Testing combination 4/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 8
   MSE: 0.004994

📈 Testing combination 5/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.008890

📈 Testing combination 6/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 6
   MSE: 0.004647

📈 Testing combination 7/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 7
   MSE: 0.001060
   🎯 New best score!

📈 Testing combination 8/32: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 13
   MSE: 0.017602

📈 Testing combination 9/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.014630

📈 Testing combination 10/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.003649

📈 Testing combination 11/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 7
   MSE: 0.008963

📈 Testing combination 12/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 10
   MSE: 0.001069

📈 Testing combination 13/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.025814

📈 Testing combination 14/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.001600

📈 Testing combination 15/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 10
   MSE: 0.014323

📈 Testing combination 16/32: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.025922

📈 Testing combination 17/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.010417

📈 Testing combination 18/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.002349

📈 Testing combination 19/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.001970

📈 Testing combination 20/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 6
   MSE: 0.001122

📈 Testing combination 21/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.013924

📈 Testing combination 22/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 9
   MSE: 0.004580

📈 Testing combination 23/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.001829

📈 Testing combination 24/32: {'batch_size': 64, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.001082

📈 Testing combination 25/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.038373

📈 Testing combination 26/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.001153

📈 Testing combination 27/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.001256

📈 Testing combination 28/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 20, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.002143

📈 Testing combination 29/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 32}




   Early stopping at epoch 6
   MSE: 0.010715

📈 Testing combination 30/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.001, 'num_units': 64}




   Early stopping at epoch 7
   MSE: 0.002148

📈 Testing combination 31/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 32}




   Early stopping at epoch 10
   MSE: 0.005505

📈 Testing combination 32/32: {'batch_size': 64, 'dropout_rate': 0.3, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 64}




   Early stopping at epoch 6
   MSE: 0.001153

🏆 Best Parameters: {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 30, 'learning_rate': 0.01, 'num_units': 32}
🏆 Best MSE: 0.001060
