

###  Import the Necessary Packages

In [2]:
###pip install -r requirements.txt 
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
import traci
import timeit
import numpy as np
from utils import *
import os
import sys
import datetime
import pandas as pd
from pandas import DataFrame
from utils import import_train_configuration
from shutil import copyfile
from pathlib import Path

###  Check the config file

In [3]:
config = import_train_configuration(config_file='training_settings.ini')
print(config)

<class 'str'>
{'gui': False, 'total_episodes': 5, 'max_steps': 3600, 'n_cars_generated': 1000, 'green_duration': 10, 'yellow_duration': 3, 'num_layers': None, 'width_layers': None, 'batch_size': 256, 'learning_rate': 0.0001, 'training_epochs': 100, 'target_update': 3, 'memory_size_min': 600, 'memory_size_max': 100000, 'eps_start': 1.0, 'eps_end': 0.05, 'eps_decay': 0.999, 'num_states': 17, 'num_actions': 8, 'gamma': 0.999, 'hidden_dim': ['64', '64'], 'tau': 0.001, 'models_path_name': 'models', 'sumocfg_file_name': 'dummy\\sumo_config.sumocfg', 'generation_process': 'random', 'state_representation': 'amount_cars', 'action_representation': 'choose_light', 'agent_type': 'DQN', 'model': 'model', 'reward_definition': 'waiting_cars'}


### 2. Instantiate the Environment and Agent

In [4]:
from Environment.SUMO_train import SUMO
env=SUMO()
print('State shape: ', env.observation_space.shape[0])
print('Number of actions: ', env.action_space.n)

from dqn_agent import Agent

agent = Agent(config['num_states'], config['num_actions'], config['hidden_dim'],
              config['memory_size_max'], config['batch_size'], config['gamma'], config['tau'],
              config['learning_rate'],config['target_update'],
              seed=0)

<class 'str'>
<class 'str'>


KeyError: 'red_duration'

### 3. Train the Agent

In [None]:
path = set_train_path(config['models_path_name'])
print('Training results will be saved in:',path)


#Define the RL training loop
def RL(n_episodes=config['total_episodes'], max_t=config['max_steps']+1000, eps_start=config['eps_start'], eps_end=config['eps_end'], eps_decay=config['eps_decay']):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """

    timestamp_start = datetime.datetime.now()
    training_time=[]
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start                    # initialize epsilon
    for i_episode in range(1, n_episodes+1):
        from generator import TrafficGenerator
        TrafficGen = TrafficGenerator(
                3600, 
                1000
            )

        TrafficGen.generate_routefile(seed=i_episode)
        state = env.reset()
        score = 0
        for t in range(max_t):
            q_values,action = agent.act(np.array(state), eps)
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        training_time.append((datetime.datetime.now()-timestamp_start).total_seconds()) 
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        print('\r                                       Episode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        if np.mean(scores_window)>=500:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
            break
    torch.save(agent.qnetwork_local.state_dict(), os.path.join(path, 'checkpoint.pth'))
    env.close()
    return scores, training_time


#Run the training
scores, training_time = RL()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(os.path.join(path, 'training_reward.png'))
plt.show()
copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))
DataFrame(data={"reward":scores}).to_csv(os.path.join(path, 'reward.csv'),sep=',')
DataFrame(data={"reward":scores,"training_time":training_time,
               "waiting_time":env._cumulative_wait_store,"avg_queue_length":env._avg_queue_length_store}
         ).to_csv(os.path.join(path, 'training_stats.csv'),sep=',',index=False)
add_masterdata(path, config, scores, training_time, env._cumulative_wait_store, env._avg_queue_length_store)

### Test the trained agent

In [None]:
traci.close()

In [None]:
# load the weights from file
path=Path("models/model_14")
agent.qnetwork_local.load_state_dict(torch.load(os.path.join(path, 'checkpoint.pth')))

# Load the test environment
from Environment.SUMO_test import SUMO_test
#traci.close()
#env=SUMO_test()
scenarios=['5-6_(1380)', '8-9_(2600)', '17-18_(3100)', '23-24_(470)']
test_cases=[Path('dummy/sumo_test1.sumocfg'), Path('dummy/sumo_test2.sumocfg'),Path('dummy/sumo_test3.sumocfg'),Path('dummy/sumo_test4.sumocfg')]
if not os.path.exists(os.path.join(path, 'test')):
    os.mkdir(os.path.join(path, 'test'))
for t, scenario in enumerate(scenarios):
    env.close()
    env=SUMO_test(test_cases[t])
    #env._sumo_cmd= set_sumo(config['gui'], test_cases[t], config['max_steps'])
    #print(env._sumo_cmd)
    #print(traci.vehicle.getIDList())
    state = env.reset()
    path_test=os.path.join(path,'test',scenario)
    print(path_test)
    _qvalues=list()
    for j in range(config['max_steps']+1000):
        q_values, action = agent.act(np.array(state))
        q_values.append(q_values.append(traci.simulation.getTime()))
        env.render()
        #print(traci.simulation.getTime())
        state, reward, done, _ = env.step(action)
        if done:
            break 
            traci.close()
    if not os.path.exists(path_test):   
        os.mkdir(path_test)        
    df_position=pd.DataFrame(env._positions, columns=['x-position', 'y-position', 'step'])
    df_position.to_csv(os.path.join(path_test, 'position.csv'), index=False)

    df_emission=pd.DataFrame(env._emissions, columns=['emission', 'step'])
    df_emission.to_csv(os.path.join(path_test, 'emission.csv'), index=False)

    df_emission=pd.DataFrame(_qvalues,  columns=['action 1', 'action 2', 'action 3', 'action 4', 'action 5', 'action 6', 'action 7', 'action 8', 'time'])
    df_emission.to_csv(os.path.join(path_test, 'q_list.csv'), index=False)

    df_emission=pd.DataFrame(env._waiting, columns=['waiting time','step'])
    df_emission.to_csv(os.path.join(path_test, 'waiting.csv'), index=False)

    df_emission=pd.DataFrame(env._waiting_cars, columns=['car id','waiting time car'])
    df_emission.to_csv(os.path.join(path_test, 'waiting_car.csv'), index=False)
        
    df_emission=pd.DataFrame(env._actions, columns=['action', 'step'])
    df_emission.to_csv(os.path.join(path_test, 'action.csv'), index=False)

    df_emission=pd.DataFrame(env._rewards, columns=['reward', 'step'])
    df_emission.to_csv(os.path.join(path_test, 'reward_replay.csv'), index=False)
    
    df_emission=pd.DataFrame(env._state_memory, columns=['N2TL_0','N2TL_1','N2TL_2','N2TL_3',
                 'E2TL_0', 'E2TL_1','E2TL_2','E2TL_3',
                 'S2TL_0', 'S2TL_1', 'S2TL_2', 'S2TL_3',
                 'W2TL_0', 'W2TL_1', 'W2TL_2', 'W2TL_3','TL'])
    df_emission.to_csv(os.path.join(path_test, 'state_memory.csv'), index=False)
    
    
            
env.close()

In [None]:
numbers = [1, 2, 3, 5, 6, 7]
numbers.insert(0,0)
print(numbers)

In [None]:
name = "hello"
globals()[name] = 10

In [None]:
lists=[[]]*10

In [None]:
lists