In [1]:
import sys

# sys.path.insert(0, "../..")
from pathlib import Path
from src.settings import SearchSpace


First:
- go to the terminal
- go to the ML22 folder, use `cd ~/ML22` or `z ML` if necessary
- if you are planning on closing the laptop to let this run by itself, start a tmux session by giving the command `tmux`. You can name the session with `ctrl+b $`, leave the tmux with `ctrl+b` d, access it again with `tmux a -t 0` or replace the 0 with the name you gave it. See [tmux](https://github.com/tmux/tmux/wiki/Getting-Started) for more info.
- in that folder is the file `hypertune.py`. Run it with `poetry run python hypertune.py`
- In the hypertune.py file, a tune_dir is specified: `models/ray`. We will check the contents of that folder after the hypertune finished. You can also use tensorboard to check the results. 
- this will take some time. I clocked 1901 seconds (30 minutes) on the VM

 0.0992908 |           119 |            3 |     26 |         576.744  |    0 |   0.942187

In [6]:
tune_dir = Path("models/ray/train_2023-06-07_19-37-39").resolve()
tune_dir.exists()


True

In [7]:
from ray.tune import ExperimentAnalysis
import ray
ray.init(ignore_reinit_error=True)

analysis = ExperimentAnalysis(str(tune_dir))


2023-06-11 18:17:22,804	INFO worker.py:1474 -- Calling ray.init() again after it has already been called.
2023-06-11 18:17:22,818	INFO experiment_analysis.py:966 -- No trial data passed in during `ExperimentAnalysis` initialization -- you are most likely loading the experiment after it has completed.
Loading trial data from the experiment checkpoint file. This may result in loading some stale information, since checkpointing is periodic.
- /workspaces/MLopdracht/models/ray/train_2023-06-07_19-37-39/train_153278bc_49_data_dir=ref_ph_5a725494,dropout=0.2943,hidden_size=168,input_size=13,num_layers=5,output_size=20,tune_dir=ref_p_2023-06-07_19-45-13
- /workspaces/MLopdracht/models/ray/train_2023-06-07_19-37-39/train_3084be12_48_data_dir=ref_ph_5a725494,dropout=0.3484,hidden_size=86,input_size=13,num_layers=2,output_size=20,tune_dir=ref_ph_2023-06-07_19-44-42
- /workspaces/MLopdracht/models/ray/train_2023-06-07_19-37-39/train_85eefcc1_19_data_dir=ref_ph_5a725494,dropout=0.1027,hidden_size=

So, we find some info:

In [8]:
analysis.results_df.columns


Index(['iterations', 'train_loss', 'test_loss', 'Accuracy', 'time_this_iter_s',
       'done', 'timesteps_total', 'episodes_total', 'training_iteration',
       'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip',
       'time_since_restore', 'iterations_since_restore', 'experiment_tag',
       'config/input_size', 'config/output_size', 'config/tune_dir',
       'config/data_dir', 'config/hidden_size', 'config/dropout',
       'config/num_layers'],
      dtype='object')

Let's focus on the parameters we wanted to tune.

In [12]:
import plotly.express as px

plot = analysis.results_df
select = ["Accuracy", "config/hidden_size", "config/dropout", "config/num_layers"]
p = plot[select].reset_index().dropna()


Let's sort by accuracy

In [13]:
p.sort_values("Accuracy", inplace=True)

Make a parallel plot

In [14]:
px.parallel_coordinates(p, color="Accuracy")


Get the best trial

In [None]:
analysis.get_best_trial(metric="test_loss", mode="min")


train_70880355

The top ten

In [None]:
p[-10:]


Unnamed: 0,trial_id,Accuracy,config/hidden_size,config/dropout,config/num_layers
17,5cf88093,0.928768,144.0,0.108569,2.0
48,faa16825,0.936121,243.0,0.353752,5.0
34,c8527815,0.943934,212.0,0.141341,5.0
9,34755f21,0.94761,234.0,0.119735,5.0
5,2cea3dee,0.950827,169.0,0.252974,3.0
0,0424d1e3,0.955882,246.0,0.196585,2.0
6,2f400d0a,0.960938,195.0,0.371472,4.0
49,fc448b9e,0.963235,250.0,0.268407,5.0
33,c4bb5948,0.969669,252.0,0.132355,5.0
22,70880355,0.978401,238.0,0.237369,4.0


Or the best config

In [None]:
analysis.get_best_config(metric="Accuracy", mode="max")


{'input_size': 13,
 'output_size': 20,
 'tune_dir': PosixPath('/workspaces/MLopdracht/models/ray'),
 'data_dir': PosixPath('/workspaces/MLopdracht/data/raw'),
 'hidden_size': 238,
 'dropout': 0.23736899150172622,
 'num_layers': 4}

In [6]:
tune_dir2 = Path("models/ray/train_2023-06-06_18-47-30").resolve()
tune_dir2.exists()
analysis = ExperimentAnalysis(tune_dir2)

plot = analysis.results_df
select = ["Accuracy", "config/hidden_size", "config/dropout", "config/num_layers"]
p2 = plot[select].reset_index().dropna()

p2.sort_values("Accuracy", inplace=True)

AttributeError: 'PosixPath' object has no attribute 'decode'

In [None]:
p2[-10:]

Unnamed: 0,trial_id,Accuracy,config/hidden_size,config/dropout,config/num_layers
7,3c7b51c0,0.915901,184.0,0.051661,4.0
30,8aae86a2,0.917279,200.0,0.270937,4.0
20,737244e9,0.92693,236.0,0.267558,3.0
44,d6016bfb,0.935202,212.0,0.156176,4.0
38,c0a86495,0.935662,248.0,0.084951,4.0
33,a850047f,0.939338,180.0,0.058717,2.0
10,4a911404,0.954044,220.0,0.066578,2.0
46,e9bd4c88,0.957721,208.0,0.240937,4.0
49,fe800b19,0.965993,216.0,0.115432,2.0
15,575cd54f,0.974724,208.0,0.065014,3.0


In [15]:
def test(prefix:Path,dir_1:Path):
    tune_dir = Path(prefix) / Path(dir_1).resolve()
    exists = tune_dir.exists()
    print(f'!!!!!!!!!!dir TUNE DIR {tune_dir} ?')
    return tune_dir

tune_dir = Path("models/ray").resolve()

test(tune_dir, Path("something"))

!!!!!!!!!!dir TUNE DIR /workspaces/MLopdracht/something ?


PosixPath('/workspaces/MLopdracht/something')