<a href="https://colab.research.google.com/github/MattJBorowski1991/AAPL_LSTM_simple/blob/main/AAPL_price_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Apple Stock Price Prediction - Data Preparation

# Install required packages
!pip install yfinance pandas matplotlib
!pip install torch

# This script downloads Apple stock data and prepares it for deep learning
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import os
from datetime import datetime, timedelta
import pickle
from google.colab import drive
drive.mount('/content/drive')
import os
notebook_dir = '/content/drive/MyDrive/Colab Notebooks/AAPL price predictor'  # CHANGE: Update this path to your .ipynb folder
if not os.path.exists(notebook_dir):
    raise ValueError(f"Directory {notebook_dir} does not exist. Please update notebook_dir to the correct path containing your .ipynb file.")
print(f"Files will be saved to: {notebook_dir}")

# CHANGE: Verify write permissions by creating a test file
test_file = os.path.join(notebook_dir, 'test_write.txt')
try:
    with open(test_file, 'w') as f:
        f.write('Test')
    os.remove(test_file)
    print(f"Write permissions verified for {notebook_dir}")
except Exception as e:
    raise ValueError(f"Cannot write to {notebook_dir}: {e}")


# Set random seed for reproducibility
np.random.seed(42)


# ADDED: Function to fetch new Apple stock data
def fetch_new_data(last_date):
    """Fetch new Apple stock data starting from the day after last_date."""
    end_date = datetime.now().date()
    start_date = last_date + timedelta(days=1)
    if start_date >= end_date:
        print("No new data to fetch.")
        return None
    print(f"Fetching new data from {start_date} to {end_date}...")
    new_data = yf.download("AAPL", start=start_date, end=end_date)
    return new_data

# ADDED: Function to check and update data
def update_stock_data():
    """Check for new data and append to existing dataset if not already present."""
    data_file = os.path.join(notebook_dir, 'aapl_data.csv')

    # Load existing data or download full history
    if os.path.exists(data_file):
        df = pd.read_csv(data_file, index_col='Date', parse_dates=True)
        last_date = df.index.max().date()
    else:
        print("No existing data found. Downloading full history...")
        df = yf.download("AAPL", period="max")
        last_date = df.index.max().date()

    # Flatten MultiIndex columns if present
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] if col[1] == '' else f"{col[0]}_{col[1]}" for col in df.columns]
        # For single-ticker data, remove '_AAPL' suffix for simplicity
        df.columns = [col.replace('_AAPL', '') for col in df.columns]

    # Fetch new data
    new_data = fetch_new_data(last_date)

    if new_data is not None and not new_data.empty:
        # Flatten MultiIndex columns for new data
        if isinstance(new_data.columns, pd.MultiIndex):
            new_data.columns = [col[0] if col[1] == '' else f"{col[0]}_{col[1]}" for col in new_data.columns]
            new_data.columns = [col.replace('_AAPL', '') for col in new_data.columns]
        # Append new data
        df = pd.concat([df, new_data])
        # Remove duplicates based on date index
        df = df[~df.index.duplicated(keep='last')]
        # Sort by date
        df = df.sort_index()
        # Save updated data to Google Drive
        df.to_csv(data_file)
        print(f"Updated data saved to {data_file}")
    else:
        print("No new data to append.")

    return df

# Download or update Apple's stock data
print("Checking for new Apple stock data...")
aapl_data = update_stock_data()

# Display info about the dataset
print(f"Dataset shape: {aapl_data.shape}")
print(f"Date range: {aapl_data.index.min()} to {aapl_data.index.max()}")
print(f"Total days: {aapl_data.shape[0]}")
print(f"Total datapoints: {aapl_data.shape[0] * aapl_data.shape[1]}")

# Preview the data
print("\nData Preview:")
print(aapl_data.head())

# Check for missing values
print("\nMissing values:")
print(aapl_data.isnull().sum())

# Calculate technical indicators
print("\nCalculating technical indicators...")
df = aapl_data.copy()

# 1. RSI (14-day)
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI_14'] = 100 - (100 / (1 + rs))

# 2. Add date-based features
df['DAY_OF_WEEK'] = df.index.dayofweek
df['MONTH'] = df.index.month
df['DAY_OF_MONTH'] = df.index.day

# 3. Price patterns - gaps
df['Gap_Up'] = ((df['Open'] > df['Close'].shift(1)) * 1)
df['GAP_DOWN'] = ((df['Open'] < df['Close'].shift(1)) * 1)

# 4. Add our target variable - next day's opening price
df['Next_Day_Open'] = df['Open'].shift(-1)

# Remove rows with NaN values (from rolling calculations)
df_clean = df.dropna()
print(f"Original features: {aapl_data.shape[1]}")
print(f"Expanded features: {df.shape[1]}")
print(f"Total datapoints after feature engineering: {df_clean.shape[0] * df_clean.shape[1]}")

# Show all features
print("\nAvailable features in the dataset:")
for i, col in enumerate(df_clean.columns):
    print(f"{i+1}. {col}")

# Prepare data for model training
print("\nPreparing data for training...")

# Define features and target
X = df_clean.drop(['Next_Day_Open'], axis=1)
y = df_clean['Next_Day_Open']

# Feature scaling
print("Applying MinMax scaling to features...")
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Create sequences for time series prediction
def create_sequences(X, y, time_steps=90):
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

# Define sequence length (lookback period)
sequence_length = 90  # Using 90 days of data to predict the next day
print(f"Creating sequences with lookback period of {sequence_length} days...")
X_seq, y_seq = create_sequences(X_scaled, y_scaled, sequence_length)
print(f"Sequence shape: {X_seq.shape}")
print(f"Target shape: {y_seq.shape}")

# Train-test split
train_size = int(len(X_seq) * 0.9)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]
print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

# Save processed data
np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
np.save('X_test.npy', X_test)
np.save('y_test.npy', y_test)

# Save scalers for later use
import pickle
with open('scaler_X.pkl', 'wb') as f:
    pickle.dump(scaler_X, f)
with open('scaler_y.pkl', 'wb') as f:
    pickle.dump(scaler_y, f)
print("\nData preparation complete! Files saved and ready for model training.")

# Save the feature list for reference
with open('feature_list.txt', 'w') as f:
    for feature in X.columns:
        f.write(f"{feature}\n")
print("Feature list saved to 'feature_list.txt'")


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

[*********************100%***********************]  1 of 1 completed


Updated data saved to /content/drive/MyDrive/Colab Notebooks/AAPL price predictor/aapl_data.csv
Dataset shape: (11183, 5)
Date range: 1980-12-12 00:00:00 to 2025-04-25 00:00:00
Total days: 11183
Total datapoints: 55915

Data Preview:
               Close      High       Low      Open     Volume
Date                                                         
1980-12-12  0.098726  0.099155  0.098726  0.098726  469033600
1980-12-15  0.093575  0.094005  0.093575  0.094005  175884800
1980-12-16  0.086707  0.087136  0.086707  0.087136  105728000
1980-12-17  0.088853  0.089282  0.088853  0.088853   86441600
1980-12-18  0.091429  0.091858  0.091429  0.091429   73449600

Missing values:
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64

Calculating technical indicators...
Original features: 5
Expanded features: 12
Total datapoints after feature engineering: 134028

Available features in the dataset:
1. Close
2. High
3. Low
4. Open
5. Volume
6. RSI_14
7. DAY_OF_WEEK
8. MONTH

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
import random
import json  # ADDED: For saving/loading hyperparameter metadata

# Load prepared data
X_train = np.load('X_train.npy')
y_train = np.load('y_train.npy')
X_test = np.load('X_test.npy')
y_test = np.load('y_test.npy')

# Define custom dataset class
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx].unsqueeze(-1)

# Create datasets
train_dataset = StockDataset(X_train, y_train)
test_dataset = StockDataset(X_test, y_test)

def create_data_loaders(batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

# Define model architecture
class StockPredictor(nn.Module):
    def __init__(self, hidden_size, num_layers, dropout_rate):
        super(StockPredictor, self).__init__()
        self.lstm = nn.LSTM(input_size=X_train.shape[2], hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.bn = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.bn(out[:, -1, :])
        out = self.dropout(out)
        out = self.fc(out)
        return out

# Define hyperparameter search grid and sampling function
hyperparam_grid = {
    'batch_size': [16, 32, 64, 128],
    'hidden_size': [32, 64, 128, 256],
    'num_layers': [1, 2, 3],
    'dropout_rate': [0.0, 0.1, 0.25, 0.5],
    'learning_rate': [0.0001, 0.001, 0.01],
    'weight_decay': [0.0, 0.0001, 0.001],
    'num_epochs': [10, 20]
}

def sample_hyperparameters():
    """Randomly sample a set of hyperparameters from the grid."""
    return {
        'batch_size': random.choice(hyperparam_grid['batch_size']),
        'hidden_size': random.choice(hyperparam_grid['hidden_size']),
        'num_layers': random.choice(hyperparam_grid['num_layers']),
        'dropout_rate': random.choice(hyperparam_grid['dropout_rate']),
        'learning_rate': random.choice(hyperparam_grid['learning_rate']),
        'weight_decay': random.choice(hyperparam_grid['weight_decay']),
        'num_epochs': random.choice(hyperparam_grid['num_epochs'])
    }

# Flag to enable/disable hyperparameter search
USE_HYPERPARAM_SEARCH = True  # Set to True for the error case
NUM_SEARCH_ITERATIONS = 100  # As per the error (2 configs)

# Initialize best configuration tracking
best_config = None
best_test_loss = float('inf')
best_config_path = os.path.join(notebook_dir, 'best_config.txt')

# Conditional configuration setup
if not USE_HYPERPARAM_SEARCH:
    config = {
        'batch_size': 32,
        'hidden_size': 64,
        'num_layers': 2,
        'dropout_rate': 0.25,
        'learning_rate': 0.001,
        'weight_decay': 0.001,
        'num_epochs': 50
    }
    configs_to_try = [config]
else:
    configs_to_try = [sample_hyperparameters() for _ in range(NUM_SEARCH_ITERATIONS)]
    print(f"Performing hyperparameter search over {NUM_SEARCH_ITERATIONS} random configurations")

# Training loop over configurations
for config_idx, config in enumerate(configs_to_try):
    print(f"\nTraining with config {config_idx + 1}/{len(configs_to_try)}: {config}")

    # Create data loaders with current batch size
    train_loader, test_loader = create_data_loaders(config['batch_size'])

    # Initialize model with current parameters
    model = StockPredictor(
        hidden_size=config['hidden_size'],
        num_layers=config['num_layers'],
        dropout_rate=config['dropout_rate']
    )

    # MODIFIED: Load saved weights if available and compatible
    checkpoint_path = os.path.join(notebook_dir, f'checkpoint_best_config_{config_idx}.pth')
    best_loss_path = os.path.join(notebook_dir, f'best_loss_config_{config_idx}.txt')
    metadata_path = os.path.join(notebook_dir, f'config_metadata_{config_idx}.json')  # ADDED: Metadata file

    current_best_loss = float('inf')
    can_load_checkpoint = False
    if os.path.exists(checkpoint_path) and os.path.exists(metadata_path):
        # ADDED: Load metadata and check compatibility
        with open(metadata_path, 'r') as f:
            saved_config = json.load(f)
        # Check if critical hyperparameters match
        if (saved_config['hidden_size'] == config['hidden_size'] and
            saved_config['num_layers'] == config['num_layers'] and
            saved_config['dropout_rate'] == config['dropout_rate']):
            can_load_checkpoint = True
            try:
                model.load_state_dict(torch.load(checkpoint_path))
                print(f"Loaded weights from {checkpoint_path}")
                if os.path.exists(best_loss_path):
                    with open(best_loss_path, 'r') as f:
                        current_best_loss = float(f.read())
                    print(f"Loaded best loss for config {config_idx}: {current_best_loss}")
                else:
                    print(f"No best loss file found for config {config_idx}, starting with infinity")
            except RuntimeError as e:
                print(f"Failed to load checkpoint due to architecture mismatch: {e}")
                can_load_checkpoint = False
        else:
            print(f"Checkpoint {checkpoint_path} has incompatible hyperparameters: {saved_config}")
            print(f"Current config: {config}")
    else:
        print(f"No checkpoint or metadata found for config {config_idx}, starting with random weights")

    if not can_load_checkpoint:
        print(f"Initializing model with random weights for config {config_idx}")

    # Initialize optimizer and loss function
    criterion = nn.MSELoss()
    optimizer = optim.Adam(
        model.parameters(),
        lr=config['learning_rate'],
        weight_decay=config['weight_decay']
    )

    # Training loop with progress monitoring
    print(f"Training model with {config['num_epochs']} epochs")
    train_losses = []
    test_losses = []

    for epoch in range(config['num_epochs']):
        model.train()
        total_train_loss = 0
        for batch in train_loader:
            X_batch, y_batch = batch
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Evaluate on test set
        model.eval()
        total_test_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                X_batch, y_batch = batch
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                total_test_loss += loss.item()

        avg_test_loss = total_test_loss / len(test_loader)
        test_losses.append(avg_test_loss)

        print(f"Config {config_idx + 1}, Epoch {epoch + 1}/{config['num_epochs']}, Train Loss: {avg_train_loss:.6f}, Test Loss: {avg_test_loss:.6f}")

        # MODIFIED: Save weights and metadata if this epoch has the best test loss
        if avg_test_loss < current_best_loss:
            current_best_loss = avg_test_loss
            torch.save(model.state_dict(), checkpoint_path)
            with open(best_loss_path, 'w') as f:
                f.write(str(current_best_loss))
            # ADDED: Save hyperparameter metadata
            with open(metadata_path, 'w') as f:
                json.dump({
                    'hidden_size': config['hidden_size'],
                    'num_layers': config['num_layers'],
                    'dropout_rate': config['dropout_rate']
                }, f)
            # print(f"Saved new best weights for config {config_idx + 1} with test loss {current_best_loss}")

        # Update global best configuration
        if avg_test_loss < best_test_loss:
            best_test_loss = avg_test_loss
            best_config = config
            with open(best_config_path, 'w') as f:
                f.write(str(best_config))

    # Save trained model for this config
    torch.save(model.state_dict(), os.path.join(notebook_dir, f'stock_predictor_model_config_{config_idx}.pth'))
    print(f"Model for config {config_idx + 1} saved to {os.path.join(notebook_dir, f'stock_predictor_model_config_{config_idx}.pth')}")

    # Save training history for this config
    history_df = pd.DataFrame({'epoch': range(1, config['num_epochs'] + 1), 'train_loss': train_losses, 'test_loss': test_losses})
    history_df.to_csv(os.path.join(notebook_dir, f'training_history_config_{config_idx}.csv'), index=False)
    print(f"Training history for config {config_idx + 1} saved to {os.path.join(notebook_dir, f'training_history_config_{config_idx}.csv')}")

# Print and load the best configuration
if USE_HYPERPARAM_SEARCH and best_config is not None:
    print(f"\nBest configuration found: {best_config} with test loss {best_test_loss}")
    best_config_idx = configs_to_try.index(best_config) if best_config in configs_to_try else 0
    model.load_state_dict(torch.load(os.path.join(notebook_dir, f'checkpoint_best_config_{best_config_idx}.pth')))
    print(f"Loaded best model weights for prediction from config {best_config_idx + 1}")
else:
    model.load_state_dict(torch.load(os.path.join(notebook_dir, 'checkpoint_best_config_0.pth')))
    print("Using weights from the fixed configuration")

# Rest of the code remains unchanged
def prepare_latest_sequence(df, scaler_X, feature_columns, sequence_length=90):
    """Prepare the latest sequence for prediction."""
    latest_data = df.iloc[-sequence_length:].copy().reset_index()
    latest_data['Date'] = pd.to_datetime(latest_data['Date'])
    delta = latest_data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14, min_periods=1).mean()
    avg_loss = loss.rolling(window=14, min_periods=1).mean()
    rs = avg_gain / avg_loss
    latest_data['RSI_14'] = 100 - (100 / (1 + rs))
    latest_data['Gap_Up'] = (latest_data['Open'] > latest_data['Close'].shift(1)).astype(int)
    latest_data['GAP_DOWN'] = (latest_data['Open'] < latest_data['Close'].shift(1)).astype(int)
    latest_data['Gap_Up'] = latest_data['Gap_Up'].fillna(0).astype(int)
    latest_data['GAP_DOWN'] = latest_data['GAP_DOWN'].fillna(0).astype(int)
    latest_data['DAY_OF_WEEK'] = latest_data['Date'].dt.dayofweek
    latest_data['MONTH'] = latest_data['Date'].dt.month
    latest_data['DAY_OF_MONTH'] = latest_data['Date'].dt.day
    latest_data = latest_data.drop(['Next_Day_Open'], axis=1, errors='ignore')
    missing_cols = [col for col in feature_columns if col not in latest_data.columns]
    for col in missing_cols:
        latest_data[col] = 0
    features = latest_data[feature_columns]
    if features.isnull().any().any():
        features = features.fillna(0)
    scaled_features = scaler_X.transform(features)
    return torch.tensor(scaled_features, dtype=torch.float32).unsqueeze(0)

def make_and_store_prediction(model, df, scaler_X, scaler_y, feature_columns):
    """Make prediction for next day's open and store results."""
    prediction_file = os.path.join(notebook_dir, 'predictions.csv')
    print(f"Attempting to save predictions to: {prediction_file}")
    latest_sequence = prepare_latest_sequence(df, scaler_X, feature_columns)
    model.eval()
    with torch.no_grad():
        scaled_prediction = model(latest_sequence).numpy()
        predicted_price = scaler_y.inverse_transform(scaled_prediction)[0][0]
    current_date = df.index[-1].date()
    prev_close = df['Close'].iloc[-1]
    next_day = current_date + timedelta(days=1)
    predicted_change = ((predicted_price - prev_close) / prev_close) * 100
    actual_price = df['Open'].iloc[-1] if 'Open' in df.columns else np.nan
    actual_change = ((actual_price - prev_close) / prev_close) * 100 if not np.isnan(actual_price) else np.nan
    prediction_record = {
        'Date': current_date,
        'Previous_Close': prev_close,
        'Predicted_Price': predicted_price,
        'Predicted_Change_Percent': predicted_change,
        'Actual_Price': actual_price,
        'Actual_Change_Percent': actual_change
    }
    dtypes = {
        'Date': 'datetime64[ns]',
        'Previous_Close': 'float64',
        'Predicted_Price': 'float64',
        'Predicted_Change_Percent': 'float64',
        'Actual_Price': 'float64',
        'Actual_Change_Percent': 'float64'
    }
    if os.path.exists(prediction_file):
        pred_df = pd.read_csv(prediction_file, parse_dates=['Date'])
    else:
        pred_df = pd.DataFrame(columns=['Date', 'Previous_Close', 'Predicted_Price',
                                        'Predicted_Change_Percent', 'Actual_Price',
                                        'Actual_Change_Percent']).astype(dtypes)
    new_record_df = pd.DataFrame([prediction_record]).astype(dtypes)
    if current_date not in pred_df['Date'].values:
        pred_df = pd.concat([pred_df, new_record_df], ignore_index=True)
        try:
            pred_df.to_csv(prediction_file, index=False)
            print(f"Prediction successfully saved to {prediction_file}")
        except Exception as e:
            print(f"Error saving predictions to {prediction_file}: {e}")
    else:
        print(f"Prediction for {current_date} already exists")
    return predicted_price, next_day

# Make prediction for next day
print("\nMaking prediction for next day's opening price...")
feature_columns = df_clean.drop(['Next_Day_Open'], axis=1).columns
predicted_price, next_day = make_and_store_prediction(model, aapl_data, scaler_X, scaler_y, feature_columns)
print(f"Predicted next day opening price for {next_day}: ${predicted_price:.2f}")

Performing hyperparameter search over 100 random configurations

Training with config 1/100: {'batch_size': 128, 'hidden_size': 128, 'num_layers': 1, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'weight_decay': 0.001, 'num_epochs': 20}
Checkpoint /content/drive/MyDrive/Colab Notebooks/AAPL price predictor/checkpoint_best_config_0.pth has incompatible hyperparameters: {'hidden_size': 32, 'num_layers': 1, 'dropout_rate': 0.1}
Current config: {'batch_size': 128, 'hidden_size': 128, 'num_layers': 1, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'weight_decay': 0.001, 'num_epochs': 20}
Initializing model with random weights for config 0
Training model with 20 epochs
Config 1, Epoch 1/20, Train Loss: 0.055298, Test Loss: 0.009270
Config 1, Epoch 2/20, Train Loss: 0.020512, Test Loss: 0.061041
Config 1, Epoch 3/20, Train Loss: 0.013976, Test Loss: 0.004354
Config 1, Epoch 4/20, Train Loss: 0.008802, Test Loss: 0.003934
Config 1, Epoch 5/20, Train Loss: 0.005647, Test Loss: 0.002723
Config 1, E

KeyboardInterrupt: 