In [7]:
"""
This module aims to describe how to implement
your own model instead of using default ones.

Of course you shall have consistency between model creation
and ML platform configuration.
"""
import gym
import numpy as np
from pathlib import Path
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Input, Flatten
)

from sweet.agents.dqn.train import learn
from sweet.interface.tf.tf_platform import TFPlatform


def custom_model(env_name):
    # Create env to retrieve state shape and action space
    env = gym.make(env_name)
    input_shape = env.observation_space.shape
    output_shape = env.action_space.n

    # Then create TF 2.0 model
    inputs = Input(shape=input_shape)
    x = Flatten()(inputs)

    x = Dense(128, activation='tanh')(x)
    x = Dense(128, activation='tanh')(x)
    predictions = Dense(output_shape, activation='linear')(x)

    # Finally build model
    model = Model(inputs=inputs, outputs=predictions, name='custom_model')
    model.summary()

    return model


def experiment_custom_model(env_name):
    # Create custom model
    model = custom_model(env_name)

    # Train model
    learn(
        ml_platform=TFPlatform,
        env_name=env_name,
        model=model,
        total_timesteps=1e3,
        lr=0.0003,
        targets={
            'output_dir': Path('./target/notebook/'),
            'models_dir': 'models_checkpoints',
            'logs_dir': 'logs',
            'tb_dir': 'tb_events'
        }
    )

In [8]:
experiment_custom_model('CartPole-v0')

dqn-train   : INFO     {'logger': <sweet.common.logging.Logger object at 0x7fc470419438>, 'models_dir': PosixPath('target/notebook/run_18-02-2020_19_11_03/models_checkpoints'), 'run_target_dir': PosixPath('target/notebook/run_18-02-2020_19_11_03'), 'targets': {'output_dir': PosixPath('target/notebook'), 'models_dir': 'models_checkpoints', 'logs_dir': 'logs', 'tb_dir': 'tb_events'}, 'log_interval': 1, 'model_checkpoint_freq': 100000.0, 'replay_buffer': 2000, 'epsilon_decay': 0.995, 'epsilon_min': 0.01, 'epsilon': 1.0, 'gamma': 0.95, 'lr': 0.0003, 'total_timesteps': 1000.0, 'model': <tensorflow.python.keras.engine.training.Model object at 0x7fc470485d30>, 'env_name': 'CartPole-v0', 'ml_platform': <class 'sweet.interface.tf.tf_platform.TFPlatform'>}
dqn-train   : INFO     {'logger': <sweet.common.logging.Logger object at 0x7fc470419438>, 'models_dir': PosixPath('target/notebook/run_18-02-2020_19_11_03/models_checkpoints'), 'run_target_dir': PosixPath('target/notebook/run_18-02-2020_19_11_

Model: "custom_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               640       
_________________________________________________________________
dense_7 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 258       
Total params: 17,410
Trainable params: 17,410
Non-trainable params: 0
_________________________________________________________________


dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=45
dqn-train   : INFO     total_timesteps=45
dqn-train   : INFO     FPS=121
dqn-train   : INFO     FPS=121
dqn-train   : INFO     Mean rewards=45.0
dqn-train   : INFO     Mean rewards=45.0
dqn-train   : INFO     Mean episode length=45.0
dqn-train   : INFO     Mean episode length=45.0
dqn-train   : INFO     Time elapsed=0:00:00.369443
dqn-train   : INFO     Time elapsed=0:00:00.369443
dqn-train   : INFO     ETA=0:00:07.892562
dqn-train   : INFO     ETA=0:00:07.892562
dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=60
dqn-train   : INFO     total_timesteps=60
dqn-train   : INFO     FPS=120
dqn-train   : INFO     FPS=120
dqn-train   : INFO     Mean rewards=15.0
dqn-train   : INFO     Mean rewards=15.0
dqn-train   : INFO     Mean episode length=15.0
dqn-train   : INFO     Mean episode length=15.0
dqn-train   : INFO     Time elapsed=0:00:00.49

dqn-train   : INFO     Mean rewards=95.0
dqn-train   : INFO     Mean episode length=95.0
dqn-train   : INFO     Mean episode length=95.0
dqn-train   : INFO     Time elapsed=0:00:02.666096
dqn-train   : INFO     Time elapsed=0:00:02.666096
dqn-train   : INFO     ETA=0:00:04.676471
dqn-train   : INFO     ETA=0:00:04.676471
dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=408
dqn-train   : INFO     total_timesteps=408
dqn-train   : INFO     FPS=137
dqn-train   : INFO     FPS=137
dqn-train   : INFO     Mean rewards=44.0
dqn-train   : INFO     Mean rewards=44.0
dqn-train   : INFO     Mean episode length=44.0
dqn-train   : INFO     Mean episode length=44.0
dqn-train   : INFO     Time elapsed=0:00:02.961745
dqn-train   : INFO     Time elapsed=0:00:02.961745
dqn-train   : INFO     ETA=0:00:04.321168
dqn-train   : INFO     ETA=0:00:04.321168
dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=427
dq

dqn-train   : INFO     ETA=0:00:03.272727
dqn-train   : INFO     ETA=0:00:03.272727
dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=593
dqn-train   : INFO     total_timesteps=593
dqn-train   : INFO     FPS=131
dqn-train   : INFO     FPS=131
dqn-train   : INFO     Mean rewards=25.0
dqn-train   : INFO     Mean rewards=25.0
dqn-train   : INFO     Mean episode length=25.0
dqn-train   : INFO     Mean episode length=25.0
dqn-train   : INFO     Time elapsed=0:00:04.505574
dqn-train   : INFO     Time elapsed=0:00:04.505574
dqn-train   : INFO     ETA=0:00:03.106870
dqn-train   : INFO     ETA=0:00:03.106870
dqn-train   : INFO     Update
dqn-train   : INFO     Update
dqn-train   : INFO     total_timesteps=636
dqn-train   : INFO     total_timesteps=636
dqn-train   : INFO     FPS=132
dqn-train   : INFO     FPS=132
dqn-train   : INFO     Mean rewards=43.0
dqn-train   : INFO     Mean rewards=43.0
dqn-train   : INFO     Mean episode length=43.0
dqn-tr

In [30]:

from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import pandas as pd

import traceback


# Extraction function
def tflog2pandas(path: str) -> pd.DataFrame:
    """convert single tensorflow log file to pandas DataFrame
    
    Parameters
    ----------
    path : str
        path to tensorflow log file
    
    Returns
    -------
    pd.DataFrame
        converted dataframe
    """
    DEFAULT_SIZE_GUIDANCE = {
        "compressedHistograms": 1,
        "images": 1,
        "scalars": 0,  # 0 means load all
        "histograms": 1,
    }
    runlog_data = pd.DataFrame({"total_timesteps": [], "fps": [], "step": []})
    try:
        event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        print(event_acc.Tags())
        for tag in tags:
            event_list = event_acc.Scalars(tag)
            values = list(map(lambda x: x.value, event_list))
            step = list(map(lambda x: x.step, event_list))
            r = {"metric": [tag] * len(step), "value": values, "step": step}
            r = pd.DataFrame(r)
            runlog_data = pd.concat([runlog_data, r])
    # Dirty catch of DataLossError
    except:
        print("Event file possibly corrupt: {}".format(path))
        traceback.print_exc()
    return runlog_data

In [31]:
tflog2pandas('./target/notebook/run_18-02-2020_19_11_03/tb_events/')

{'images': [], 'audio': [], 'histograms': [], 'scalars': [], 'distributions': [], 'tensors': ['tb_logs/total_timesteps', 'tb_logs/FPS', 'tb_logs/Mean rewards', 'tb_logs/Mean episode length'], 'graph': False, 'meta_graph': False, 'run_metadata': []}
Event file possibly corrupt: ./target/notebook/run_18-02-2020_19_11_03/tb_events/


Traceback (most recent call last):
  File "<ipython-input-30-4fed7c5d9ace>", line 34, in tflog2pandas
    event_list = event_acc.Scalars(tag)
  File "/home/adrien/.virtualenvs/sweet/lib/python3.6/site-packages/tensorboard/backend/event_processing/event_accumulator.py", line 438, in Scalars
    return self.scalars.Items(tag)
  File "/home/adrien/.virtualenvs/sweet/lib/python3.6/site-packages/tensorboard/backend/event_processing/reservoir.py", line 110, in Items
    raise KeyError('Key %s was not found in Reservoir' % key)
KeyError: 'Key tb_logs/total_timesteps was not found in Reservoir'


Unnamed: 0,total_timesteps,fps,step
