# MountainCar-v0
Run the MountainCar-v0 simulation to train or test a model using Q-learning (DQN and DDQN).  
Alternative method to train the model:  
1. Open the command terminal in the root directory
2. Execute the command ``python -m run.MountainCar_v0``

In [1]:
import os
import sys
import gym
import numpy as np
import configparser
from datetime import datetime
import tensorflow as tf
import tensorflow.compat.v1 as tf_v1

## Import local modules

In [2]:
ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(""), '..'))
sys.path.insert(0, ROOT_DIR)
from run import MountainCar_v0
from src import ReplayBuffer
from src.Utils import get_logger, eval_dict_values

## Global Variables

In [3]:
LOG_DIR = "log"
SUMM_DIR = "summaries"
CONFIG_DIR = "config"
ENV_NAME = "MountainCar-v0"
SEEDS = np.random.randint(100, 1000, size=1, dtype=np.uint16)
TF_CONFIG = tf_v1.ConfigProto(gpu_options=tf_v1.GPUOptions(per_process_gpu_memory_fraction=0.5), 
                              allow_soft_placement=True)

## Parse arguments

In [4]:
plot_result = False                        # Plots the result in matplotlib
test_model_chkpt = None                    # Address to a trained model checkpoint; Train new model is none provided
record_interval = 0
date_time = datetime.now().strftime("%d.%m.%Y %H.%M")
# Root directory needed to be specified explicitly in Jupyter Notebook
summ_dir = os.path.join(ROOT_DIR, SUMM_DIR, "{} {}".format(ENV_NAME, date_time))
log_file = os.path.join(summ_dir, LOG_DIR, "Results {} {}.log".format(ENV_NAME, date_time))
config_file = os.path.join(ROOT_DIR, CONFIG_DIR, "MountainCar-v0.ini")


## Setup logger directory

In [5]:
os.makedirs(os.path.dirname(log_file), exist_ok=True)

## Load and unpack configurations
Parameters are stored in **.ini file** under the **\config** subdirectory.  
Load the parameters from a given **config_file** and edit the configuration file to change the parameters.

In [6]:
config_dict = MountainCar_v0.get_configuration(config_file)
init_kwargs, train_kwargs = config_dict["kwargs"]
log_init_kwargs, log_train_kwargs = config_dict["log_kwargs"]
mem_size = config_dict["others"]["mem_size"]

## Run MountainCar-v0

In [7]:
if __name__ == "__main__":
    # Create environment, replay buffer and logger
    env = gym.make(ENV_NAME)
    mem = ReplayBuffer(mem_size)
    logger = get_logger(log_file)
    # Run the program
    for model_i, seed in enumerate(SEEDS, start=1):
        MountainCar_v0.run(env, seed, mem, logger, summ_dir, init_kwargs, train_kwargs, 
                           log_init_kwargs, log_train_kwargs, plot_result, model_i=model_i, 
                           sess_config=TF_CONFIG, test_model_chkpt=test_model_chkpt)

W0915 19:03:14.614444  5780 deprecation.py:506] From c:\users\raj k\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0915 19:03:14.784721  5780 deprecation.py:323] From c:\users\raj k\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\losses\losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



# Training: Model 1
Goal: Get average reward of -110.00 over 100 consecutive trials!
Training agent. Please be patient...

KeyboardInterrupt: 