<a href="https://colab.research.google.com/github/MicahSee/Optimized-Timeseries-Autoencoder/blob/main/Autoencoder_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from rae import LSTM_AE
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import tempfile

ModuleNotFoundError: No module named 'rae'

In [10]:
train_arr = np.random.rand(100, 30, 2)

In [15]:
from functools import lru_cache
def train_model_ray(config):
    # allocate hardware
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"

    # set up training params
    epochs = 100
    batch_size = 10
    max_grad = 0.005
    lr = 0.001
    embedding_dim = config['embedding_dim']

    # set up training data
    train_tensor = torch.Tensor(train_arr).to(device)
    train_dataset = TensorDataset(train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # set up model
    n_features = train_tensor.shape[2]
    seq_len = train_tensor.shape[1]
    model = LSTM_AE(seq_len, n_features, embedding_dim, max_grad)
    model = model.to(device)
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs+1):
        running_loss = 0
        model.train()

        for batch_idx, (data, target) in enumerate(train_loader, 1):
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward --> backward --> optimize
            output = model(data)
            loss = criterion(output.flatten(), target.type_as(output))

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        running_loss /= len(train_loader)

        # no val loss for now
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {running_loss}")

    with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
        path = os.path.join(temp_checkpoint_dir, "checkpoint.pt")
        torch.save(model.state_dict(), path)
        checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)
        train.report({"loss": running_loss}, checkpoint=checkpoint)

    return

In [16]:
!pip install ray[tune]



In [17]:
import os
import tempfile
from ray import tune, train
from ray.train import Checkpoint
from ray.tune.schedulers import ASHAScheduler

In [26]:
num_optim_trials = 1
seq_len = 30

search_space = {
    "embedding_dim": tune.grid_search([10, 20, 30, 40, 50, 60]),
    #"max_grad": tune.grid_search([0.005]),
}

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model_ray),
            resources={"cpu": 2, "gpu": 1}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=ASHAScheduler(),
            num_samples=num_optim_trials,
        ),
        param_space=search_space,
)

In [27]:
results = tuner.fit()

2024-01-18 21:25:45,391	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+------------------------------------------------------------------------+
| Configuration for experiment     train_model_ray_2024-01-18_21-25-43   |
+------------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator                 |
| Scheduler                        AsyncHyperBandScheduler               |
| Number of trials                 6                                     |
+------------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_model_ray_2024-01-18_21-25-43
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/train_model_ray_2024-01-18_21-25-43`

Trial status: 6 PENDING
Current time: 2024-01-18 21:25:45. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+----------------------------------------------------------+
| Trial name                    status       embedding_dim |
+-----------------

Resume experiment with: Tuner.restore(path="/root/ray_results/train_model_ray_2024-01-18_21-25-43", trainable=...)
- train_model_ray_24112_00000: FileNotFoundError('Could not fetch metrics for train_model_ray_24112_00000: both result.json and progress.csv were not found at /root/ray_results/train_model_ray_2024-01-18_21-25-43/train_model_ray_24112_00000_0_embedding_dim=10_2024-01-18_21-25-45')
- train_model_ray_24112_00001: FileNotFoundError('Could not fetch metrics for train_model_ray_24112_00001: both result.json and progress.csv were not found at /root/ray_results/train_model_ray_2024-01-18_21-25-43/train_model_ray_24112_00001_1_embedding_dim=20_2024-01-18_21-25-45')
- train_model_ray_24112_00002: FileNotFoundError('Could not fetch metrics for train_model_ray_24112_00002: both result.json and progress.csv were not found at /root/ray_results/train_model_ray_2024-01-18_21-25-43/train_model_ray_24112_00002_2_embedding_dim=30_2024-01-18_21-25-45')
- train_model_ray_24112_00003: FileNotF

Trial status: 6 PENDING
Current time: 2024-01-18 21:26:21. Total running time: 35s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+----------------------------------------------------------+
| Trial name                    status       embedding_dim |
+----------------------------------------------------------+
| train_model_ray_24112_00000   PENDING                 10 |
| train_model_ray_24112_00001   PENDING                 20 |
| train_model_ray_24112_00002   PENDING                 30 |
| train_model_ray_24112_00003   PENDING                 40 |
| train_model_ray_24112_00004   PENDING                 50 |
| train_model_ray_24112_00005   PENDING                 60 |
+----------------------------------------------------------+

