In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import ParameterGrid
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import gc
import warnings
import time
warnings.filterwarnings('ignore')

2025-09-04 16:21:30.639460: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-04 16:21:30.663369: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
def setup_gpu():
    """Configure RTX 5060 Ti for optimal performance"""
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print("GPU memory growth enabled for RTX 5060 Ti 16GB")
        except RuntimeError as e:
            print(f"GPU setup error: {e}")


GPU memory growth enabled for RTX 5060 Ti 16GB


2025-09-04 16:22:04.042090: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-09-04 16:22:04.049243: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-09-04 16:22:04.052007: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [3]:
def clear_gpu_memory():
    """Clear GPU memory between trainings"""
    tf.keras.backend.clear_session()
    gc.collect()

In [None]:
class Q64_GridSearch:
    def __init__(self, data_path):
        setup_gpu()
        
        self.df = pd.read_csv(data_path)
        self.target_feature = 'q64'
        self.scaler = MinMaxScaler()
        self.results = []
        
        q64_data = self.df[self.target_feature].values.reshape(-1, 1)
        self.scaled_data = self.scaler.fit_transform(q64_data).flatten()
        
        print(f"Data loaded: {len(self.df):,} samples")
        print(f"Using UNIVARIATE time series: {self.target_feature} only")
        print(f"Data range: {self.df[self.target_feature].min():.2f} to {self.df[self.target_feature].max():.2f}")
        
    def create_univariate_dataset(self, past_window, future_window):
        """Create univariate time series dataset"""
        X, y = [], []
        
        for i in range(len(self.scaled_data) - past_window - future_window):
            # Input: past_window values of q64
            X_window = self.scaled_data[i:i+past_window]
            # Output: future_window values of q64
            y_sequence = self.scaled_data[i+past_window:i+past_window+future_window]
            
            X.append(X_window)
            y.append(y_sequence)
        
        X = np.array(X, dtype=np.float32)
        y = np.array(y, dtype=np.float32)
        
        # Reshape X for RNN: (samples, timesteps, features)
        X = X.reshape(X.shape[0], X.shape[1], 1)
        
        # 80/20 split
        split_idx = int(len(X) * 0.8)
        X_train, X_test = X[:split_idx], X[split_idx:]
        y_train, y_test = y[:split_idx], y[split_idx:]
        
        print(f"Dataset created: X_train={X_train.shape}, y_train={y_train.shape}")
        return X_train, X_test, y_train, y_test
    
    def create_univariate_model(self, params, input_shape, output_shape):
        """Create SimpleRNN model for univariate prediction"""
        model = Sequential()
        
        # First SimpleRNN layer
        if params.get('units_2', 0) > 0:
            # Two-layer architecture
            model.add(SimpleRNN(
                params['units_1'], 
                return_sequences=True, 
                input_shape=input_shape,
                dropout=params.get('dropout', 0.0),
                recurrent_dropout=params.get('recurrent_dropout', 0.0)
            ))
            
            if params.get('dropout_layer', 0.0) > 0:
                model.add(Dropout(params['dropout_layer']))
            
            # Second SimpleRNN layer  
            model.add(SimpleRNN(
                params['units_2'], 
                return_sequences=False,
                dropout=params.get('dropout', 0.0),
                recurrent_dropout=params.get('recurrent_dropout', 0.0)
            ))
        else:
            # Single-layer architecture
            model.add(SimpleRNN(
                params['units_1'], 
                return_sequences=False, 
                input_shape=input_shape,
                dropout=params.get('dropout', 0.0),
                recurrent_dropout=params.get('recurrent_dropout', 0.0)
            ))
        
        # Optional dense layers (can now use larger sizes!)
        if params.get('dense_units_1', 0) > 0:
            model.add(Dense(params['dense_units_1'], activation='relu'))
            if params.get('dropout_dense_1', 0.0) > 0:
                model.add(Dropout(params['dropout_dense_1']))
        
        if params.get('dense_units_2', 0) > 0:
            model.add(Dense(params['dense_units_2'], activation='relu'))
            if params.get('dropout_dense_2', 0.0) > 0:
                model.add(Dropout(params['dropout_dense_2']))
        
        # Output layer
        model.add(Dense(output_shape))
        
        # Compile
        optimizer = Adam(learning_rate=params['learning_rate'])
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
        
        return model
    def estimate_vram_usage(self, params):
        """Estimate VRAM usage for univariate model"""
        n_features = 1  # Only q64
        past_window = params['past_window']
        future_window = params['future_window']
        batch_size = params['batch_size']
        units_1 = params['units_1']
        units_2 = params.get('units_2', 0)
        dense_1 = params.get('dense_units_1', 0)
        dense_2 = params.get('dense_units_2', 0)
        
        # Calculate memory in MB
        input_mem = batch_size * past_window * n_features * 4 / (1024**2)
        
        # RNN layers
        rnn1_params = (n_features + units_1 + 1) * units_1 * 3 * 4 / (1024**2)
        rnn1_states = batch_size * units_1 * 4 / (1024**2)
        
        rnn2_params = rnn2_states = 0
        if units_2 > 0:
            rnn2_params = (units_1 + units_2 + 1) * units_2 * 3 * 4 / (1024**2)
            rnn2_states = batch_size * units_2 * 4 / (1024**2)
        
        # Dense layers
        dense_params = dense_states = 0
        if dense_1 > 0:
            last_size = units_2 if units_2 > 0 else units_1
            dense_params += (last_size + 1) * dense_1 * 4 / (1024**2)
            dense_states += batch_size * dense_1 * 4 / (1024**2)
            
            if dense_2 > 0:
                dense_params += (dense_1 + 1) * dense_2 * 4 / (1024**2)
                dense_states += batch_size * dense_2 * 4 / (1024**2)
                last_size = dense_2
            else:
                last_size = dense_1
        else:
            last_size = units_2 if units_2 > 0 else units_1
        
        # Output layer
        output_params = (last_size + 1) * future_window * 4 / (1024**2)
        output_mem = batch_size * future_window * 4 / (1024**2)
        
        # Total with optimizer overhead
        base_memory = (input_mem + rnn1_params + rnn1_states + rnn2_params + 
                      rnn2_states + dense_params + dense_states + output_params + output_mem)
        total_vram = base_memory * 3.5  # Adam optimizer overhead
        
        return total_vram
    
    def evaluate_config(self, params):
        """Evaluate single configuration"""
        try:
            # Estimate VRAM usage
            vram_mb = self.estimate_vram_usage(params)
            if vram_mb > 14000:  # 14GB safety limit
                return {
                    **params, 'mae': float('inf'), 'rmse': float('inf'), 'r2': -float('inf'),
                    'vram_mb': vram_mb, 'status': f'vram_exceeded_{vram_mb:.0f}MB'
                }
            
            arch_str = f"{params['units_1']}"
            if params.get('units_2', 0) > 0:
                arch_str += f"-{params['units_2']}"
            if params.get('dense_units_1', 0) > 0:
                arch_str += f"-{params['dense_units_1']}"
            if params.get('dense_units_2', 0) > 0:
                arch_str += f"-{params['dense_units_2']}"
                
            print(f"Testing: W={params['past_window']}→{params['future_window']}, "
                  f"Arch={arch_str}, B={params['batch_size']}, "
                  f"VRAM~{vram_mb:.0f}MB")
            
            clear_gpu_memory()
            start_time = time.time()
            
            # Create dataset
            X_train, X_test, y_train, y_test = self.create_univariate_dataset(
                params['past_window'], params['future_window']
            )
            
            # Create model
            model = self.create_univariate_model(
                params, 
                (params['past_window'], 1),  # 1 feature (q64 only)
                params['future_window']
            )
            
            # Callbacks
            callbacks = [
                EarlyStopping(
                    monitor='val_loss', patience=params.get('patience', 15),
                    restore_best_weights=True, verbose=0
                ),
                ReduceLROnPlateau(
                    monitor='val_loss', factor=0.7, patience=5,
                    min_lr=1e-6, verbose=0
                )
            ]
            
            # Train model
            history = model.fit(
                X_train, y_train,
                epochs=params['epochs'],
                batch_size=params['batch_size'],
                validation_data=(X_test, y_test),
                callbacks=callbacks,
                verbose=0
            )
            
            # Predictions
            y_pred = model.predict(X_test, verbose=0)
            
            # Inverse transform to original scale
            y_pred_inv = self.scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
            y_test_inv = self.scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
            
            # Calculate metrics
            mae = mean_absolute_error(y_test_inv, y_pred_inv)
            mse = mean_squared_error(y_test_inv, y_pred_inv)
            rmse = np.sqrt(mse)
            r2 = r2_score(y_test_inv, y_pred_inv)
            
            # Per-timestep performance
            timestep_metrics = []
            for t in range(min(5, params['future_window'])):
                y_test_t = self.scaler.inverse_transform(y_test[:, t].reshape(-1, 1)).flatten()
                y_pred_t = self.scaler.inverse_transform(y_pred[:, t].reshape(-1, 1)).flatten()
                mae_t = mean_absolute_error(y_test_t, y_pred_t)
                r2_t = r2_score(y_test_t, y_pred_t)
                timestep_metrics.append({'timestep': t+1, 'mae': mae_t, 'r2': r2_t})
            
            training_time = time.time() - start_time
            epochs_trained = len(history.history['loss'])
            
            result = {
                **params,
                'mae': mae, 'mse': mse, 'rmse': rmse, 'r2': r2,
                'epochs_trained': epochs_trained, 'training_time': training_time,
                'vram_mb': vram_mb,
                'final_train_loss': history.history['loss'][-1],
                'final_val_loss': history.history['val_loss'][-1],
                'timestep_performance': timestep_metrics,
                'status': 'success'
            }
            
            print(f"  Results: MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}, "
                  f"Time={training_time:.1f}s, Epochs={epochs_trained}")
            
            del model
            clear_gpu_memory()
            
        except Exception as e:
            print(f"  ERROR: {str(e)}")
            clear_gpu_memory()
            result = {
                **params, 'mae': float('inf'), 'rmse': float('inf'), 'r2': -float('inf'),
                'status': f'error: {str(e)}'
            }
        
        print()
        return result
    def run_grid_search(self, param_grid, max_evals=None):
        """Run grid search for univariate prediction"""
        print("Starting Univariate q64 Grid Search")
        print("=" * 50)
        
        grid = list(ParameterGrid(param_grid))
        
        if max_evals and len(grid) > max_evals:
            print(f"Limiting to {max_evals} evaluations out of {len(grid)} combinations")
            # Sort by VRAM usage (smallest first)
            grid.sort(key=lambda p: self.estimate_vram_usage(p))
            grid = grid[:max_evals]
        
        print(f"Total configurations: {len(grid)}")
        print(f"Using only: {self.target_feature}")
        print()
        
        for i, params in enumerate(grid, 1):
            print(f"Configuration {i}/{len(grid)}:")
            result = self.evaluate_config(params)
            self.results.append(result)
        
        return self.results
    
    def print_summary(self):
        """Print results summary"""
        successful = [r for r in self.results if r['status'] == 'success']
        failed = [r for r in self.results if r['status'] != 'success']
        
        print("=" * 60)
        print("UNIVARIATE q64 GRID SEARCH RESULTS")
        print("=" * 60)
        print(f"Total: {len(self.results)}, Success: {len(successful)}, Failed: {len(failed)}")
        
        if successful:
            best_r2 = max(r['r2'] for r in successful)
            best_mae = min(r['mae'] for r in successful)
            avg_vram = np.mean([r.get('vram_mb', 0) for r in successful])
            
            print(f"\nBest R²: {best_r2:.4f}")
            print(f"Best MAE: {best_mae:.4f}")
            print(f"Avg VRAM: {avg_vram:.0f}MB")
            
            print(f"\nTOP 5 MODELS:")
            print("-" * 60)
            top_models = sorted(successful, key=lambda x: x['r2'], reverse=True)[:5]
            
            for i, model in enumerate(top_models, 1):
                arch = f"{model['units_1']}"
                if model.get('units_2', 0) > 0:
                    arch += f"-{model['units_2']}"
                if model.get('dense_units_1', 0) > 0:
                    arch += f"-{model['dense_units_1']}"
                if model.get('dense_units_2', 0) > 0:
                    arch += f"-{model['dense_units_2']}"
                
                print(f"{i}. R²={model['r2']:.4f} | MAE={model['mae']:.4f}")
                print(f"   W={model['past_window']}→{model['future_window']} | {arch} | "
                      f"VRAM={model.get('vram_mb', 0):.0f}MB")

        

In [None]:
def get_univariate_param_grid():
    """Parameter grid optimized for univariate q64 prediction"""
    return {
        # Your specific window sizes
        'past_window': [24, 60, 180],
        'future_window': [24, 60, 180],
        
        # Now you can use MUCH larger architectures!
        'units_1': [32, 64],            # Larger first layer
        'units_2': [0, 32, 64],              # Larger second layer
        
        # Multiple dense layers possible now
        'dense_units_1': [0, 32, 64],       # First dense layer
        'dense_units_2': [0, 32, 64],        # Second dense layer
        
        # Regularization
        'dropout': [0.0, 0.1, 0.2],
        'recurrent_dropout': [0.0, 0.1],
        'dropout_layer': [0.0, 0.1],
        'dropout_dense_1': [0.0, 0.1],
        'dropout_dense_2': [0.0, 0.1],
        
        # Training parameters
        'learning_rate': [0.001, 0.005],
        'batch_size': [32, 64],               # Can use larger batches now
        'epochs': [20, 30],
        'patience': [5, 10]
    }


In [None]:
# USAGE
if __name__ == "__main__":
    print("Univariate q64 Time Series Prediction - RTX 5060 Ti Optimized")
    print("=" * 60)
    
    # Initialize
    grid_search = Q64_GridSearch('../data/mucnuoc_gio_preprocess.csv')
    
    # Parameter grid
    param_grid = get_univariate_param_grid()
    
    print("Parameter ranges:")
    for param, values in param_grid.items():
        print(f"  {param}: {values}")
    
    total_combinations = 1
    for values in param_grid.values():
        total_combinations *= len(values)
    print(f"\nTotal combinations: {total_combinations:,}")
    
    # Run grid search
    max_evaluations = 75  # Adjust based on time budget
    results = grid_search.run_grid_search(param_grid, max_evals=max_evaluations)
    
    # Print results
    grid_search.print_summary()
    