## Colab Mount Drive


**IMPORTANT**

This colab file suppose you uploaded project file in your drive to can use same file in many session without reuploading to help save internet

otherwise you can upload files manually and modify some code (without mounting)

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


## Install Sumo for Linux (Colab)

In [None]:
!sudo add-apt-repository -y ppa:sumo/stable
!sudo apt-get update
!sudo apt-get install -y sumo sumo-tools sumo-doc


In [None]:
!source ~/.bashrc


In [None]:
!sumo --version


## Install Packages

In [None]:
!pip install traci
!pip install dotenv
!pip install ray
!pip install ray[rllib]
!pip install colorama
!pip install optuna
!pip install stable_baselines3

## Set Main Pathes

In [None]:
def is_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False



In [None]:
import platform

if platform.system() == "Linux":
    if is_colab():
      print("Running in Google Colab")
      path_main_folder = "/content/drive/My Drive/study/graduation_project/final/Code/project_files/TrafficManager/TrafficManager/"


else:
    print("Running on Windows")
    path_main_folder = "I:/My Drive/study/graduation_project/final/Code/project_files/TrafficManager/TrafficManager/"


path_data_folder = path_main_folder + "AIST_Cleaned/data3_san_stefano/"#"AIST_Cleaned/data2_mosheerIsmail/",data3_san_stefano
path_cfg = path_data_folder +"cfg.sumocfg"
path_project_folder = path_main_folder + ""



In [None]:
yaml_file = path_project_folder + "config.yaml"
keys_file = path_project_folder + "keys.env"
log_file = "sumo_log.txt"

In [None]:
# prompt: check if file path ok

import os

if os.path.exists(path_main_folder):
  print(f"File exists at: {path_main_folder}")
else:
  print(f"File does not exist at: {path_main_folder}")


## Import Packages

In [None]:
import sys
sys.path.append(path_project_folder)


In [None]:
from Connections import SumoConnection
from Connections.Connection import *
from dotenv import load_dotenv
import os
import traci
import gymnasium as gym
from numpy import inf
import numpy as np
import SumoEnvSingleAgent
from Utils_reporting import *
from Utils_running_singleAgent import *
from rewards import *
from stable_baselines3 import PPO ,DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
import torch
import time
from Callbacks import *
from models.d3qn import D3QNAgent

## Load Parameters

In [None]:
import yaml

# Load YAML file
# if something modified ,just rerun this cell
with open(yaml_file, "r") as file:
    config = yaml.safe_load(file)


In [None]:
general_settings = config['general_settings']
experiment_settings_changable = config["experiment_settings"]['changable_settings']
experiment_settings_const = config["experiment_settings"]["const_settings"]
algorithm_settings=config["algorithms_settings"]

In [None]:
is_gui = general_settings['is_gui']
see_progress_each = general_settings['see_progress_each']
enable_variation_action = general_settings["enable_variation_action"]


In [None]:
# Access specific parameters (Changable Settings)
n_epsiode = experiment_settings_changable["n_epsiode"]
max_sumo_steps = experiment_settings_changable["max_sumo_steps"]
ENV_NAME = experiment_settings_changable["ENV_NAME"]
REWARD_TYPE = experiment_settings_changable["REWARD_TYPE"]
EXPERIMENT_NAME = experiment_settings_changable["EXPERIMENT_NAME"]
precent_scale = experiment_settings_changable["precent_scale"]
seed = experiment_settings_changable["seed"]


In [None]:
# Access specific parameters (Const Settings)
max_steps = experiment_settings_const["max_steps"]
n_env = experiment_settings_const["n_env"]
durations = experiment_settings_const["durations"]
agent_id = experiment_settings_const["agent_id"]


In [None]:
sumo_traffic_scale =  int(10 * precent_scale )


In [None]:
step_size,reduced_durations = gcd_and_reduced(durations)
step_size,reduced_durations

In [None]:
reward_func = {
'proposed_reward':reward_proposed,
'literature':reward_liter,
'project_reward':reward_proj,
}
env_classes = {
    "HighGroupedSumoEnv": SumoEnvSingleAgent.HighGroupedSumoEnv,
    "GroupedSumoEnv": SumoEnvSingleAgent.GroupedSumoEnv ,
    "SumoEnv": SumoEnvSingleAgent.SumoEnv,
}

## Open Sumo

In [None]:
from dotenv import load_dotenv
load_dotenv(keys_file)

In [None]:
if platform.system() == "Linux":
    if is_colab():
        print("Running in Google Colab")
        sumo_home = str(os.getenv("sumo_home_lin"))
        sumo_binary = str(os.getenv("sumo_binary_lin"))
        if is_gui:
            print("GUI not supported on linux")
    else:
      print("Not running in HPC Linux (default linux)")


else:
    print("Running on Windows")
    print("Running in Google Colab")
    sumo_home = str(os.getenv("sumo_home_win"))
    if is_gui:
        sumo_binary = str(os.getenv("sumo_binary_gui_win"))
    else:
        sumo_binary = str(os.getenv("sumo_binary_cli_win"))




In [None]:
cmd=[sumo_binary, "-c", path_cfg ,"--log",log_file, "--verbose", "true","--step-length", str(step_size)]


In [None]:
conn=SumoConnection.SumoConnection(cmd)

## Load Traffic Lights info

In [None]:
traffic_lights,policies=get_traffic_lights_policies_high_group(durations=durations,agent_ids=[agent_id])


In [None]:
traffic_lights

## Make The Environment

In [None]:
# Just reload if you modified the SumoEnvSingleAgent.py file
import importlib
import SumoEnvSingleAgent
#importlib.reload(Connections.RealConnection)



In [None]:

def create_env(config_):
    args = (traffic_lights[0][1],traffic_lights[0][0], reduced_durations,reward_func[REWARD_TYPE], max_steps, max_sumo_steps,sumo_traffic_scale,enable_variation_action,config_,seed)
    env = env_classes[ENV_NAME](*args)
    env.python_path =sumo_home
    env.data_path = path_data_folder
    env.see_progress_each = see_progress_each
    return env

#from ray.tune.registry import register_env # If used later with rlib
#register_env(ENV_NAME, create_env)



In [None]:
set_global_conn(conn)


In [None]:
get_global_conn()

## DQN

### Load DQN Parameters

In [None]:
dqn_settings=algorithm_settings['DQN']


In [None]:
exploration_initial_eps = dqn_settings["exploration_initial_eps"]
exploration_final_eps = dqn_settings["exploration_final_eps"]
exploration_fraction = dqn_settings["exploration_fraction"]
learning_rate = float(dqn_settings["learning_rate"])
gamma = dqn_settings["gamma"]
policy_kwargs = dict(
    net_arch=dqn_settings["policy_kwargs"]["net_arch"],
    activation_fn=torch.nn.ReLU
)
batch_size = dqn_settings['batch_size']

### Prepare DQN

In [None]:
env = create_env({})  # Create the environment instance

In [None]:
model = EpsDQN(
    RMS_DQNPolicy,
    env,
    verbose = 1,
    batch_size=batch_size,
    learning_rate=learning_rate,
    gamma=gamma,
    exploration_initial_eps=exploration_initial_eps,  # Initial epsilon value.
    exploration_final_eps=exploration_final_eps,      # Final epsilon value.
    exploration_fraction=exploration_fraction,        # Fraction of total timesteps for linear decay.
    policy_kwargs=policy_kwargs,
    seed=seed
)

In [None]:
callback = Stable_RewardCallback(max_episodes = n_epsiode)

### Train

In [None]:
time_before = time.time()
model.learn(total_timesteps=1e9, callback=callback)
time_after = time.time()

In [None]:
# Save the model
# model.save("models/"+EXPERIMENT_NAME)

In [None]:
rewards = callback.episode_rewards
results_dict = env.last_run_dict # env.env for D3QN ,env for PPO


## PPO

### Hyperparameter Optimizing using Optuna (Only run once)

In [None]:
import optuna
from colorama import Fore, Style
import torch

n_tune_episode = 10
n_trials = 20
seeds = [0, 1, 2]  # You can change or increase this list

def objective(trial):
    # Sample hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-3)
    gamma = trial.suggest_float('gamma', 0.9, 0.9999)
    gae_lambda = trial.suggest_float('gae_lambda', 0.8, 1.0)
    ent_coef = trial.suggest_float('ent_coef', 0.0, 0.1)
    clip_range = trial.suggest_float('clip_range', 0.1, 0.4)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512])
    net_arch = trial.suggest_categorical('net_arch', [32, 64, 128, 256, 512])

    policy_kwargs = dict(
        net_arch=[net_arch],
        activation_fn=torch.nn.ReLU
    )

    rewards_diffs = []

    for seed in seeds:
        env = create_env({})
        vec_env = DummyVecEnv([lambda: env])
        vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False)

        model = PPO(
            "MlpPolicy", vec_env,
            learning_rate=learning_rate,
            gamma=gamma,
            gae_lambda=gae_lambda,
            ent_coef=ent_coef,
            clip_range=clip_range,
            batch_size=batch_size,
            verbose=0,
            policy_kwargs=policy_kwargs,
            seed=seed
        )

        callback = Stable_RewardCallback(max_episodes=n_tune_episode)
        model.learn(total_timesteps=int(1e4), callback=callback)  # reduce steps for tuning

        rewards = callback.episode_rewards
        if len(rewards) >= 2:
            rewards_diff = rewards[-1] - rewards[0]
            rewards_diffs.append(rewards_diff)
            print(Fore.BLUE + f"Seed {seed} epsiodes done in trial {trial.number+1}" + Style.RESET_ALL)


    avg_reward_diff = sum(rewards_diffs) / len(rewards_diffs)

    print(Fore.GREEN + f"-------------Trial {trial.number+1} finished, avg reward delta = {avg_reward_diff:.2f}-----------" + Style.RESET_ALL)
    return avg_reward_diff

# Create Optuna study
study = optuna.create_study(direction="maximize",sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=n_trials)

print("Best Hyperparameters:", study.best_params)


In [None]:
study.best_params

In [None]:
study.best_value

### Intialize Enviroment

In [None]:
env = create_env({})  # Create the environment instance


In [None]:
vec_env = DummyVecEnv([lambda: env])

# 3. Wrap with VecNormalize
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False)



In [None]:
print("Running mean:", vec_env.obs_rms.mean)
print("Running var:", vec_env.obs_rms.var)


In [None]:
ppo_Settings = algorithm_settings['PPO']
ppo_experiment_settings =ppo_Settings[str(ENV_NAME+"_"+REWARD_TYPE)]
ppo_experiment_settings

In [None]:
ppo_experiment_settings["net_arch"]

In [None]:
policy_kwargs = {
    "net_arch": [ppo_experiment_settings["net_arch"]],
    "activation_fn": torch.nn.ReLU
}

model = PPO(
    "MlpPolicy", vec_env,
    learning_rate=ppo_experiment_settings['learning_rate'],
    gamma=ppo_experiment_settings['gamma'],
    gae_lambda=ppo_experiment_settings['gae_lambda'],
    ent_coef=ppo_experiment_settings['ent_coef'],
    clip_range=ppo_experiment_settings['clip_range'],
    batch_size=ppo_experiment_settings['batch_size'],
    verbose=0,
    policy_kwargs=policy_kwargs,
    seed=seed
)


In [None]:
callback = Stable_RewardCallback(max_episodes = n_epsiode)

### Begin Training

In [None]:
time_before = time.time()
model.learn(total_timesteps=1e9, callback=callback)
time_after = time.time()

In [None]:
# Save the model
model.save("models/"+EXPERIMENT_NAME+"_test")

In [None]:
rewards = callback.episode_rewards
results_dict = env.last_run_dict # env.env for D3QN ,env for PPO ##Important should call this before closing env or evaluate


### Evaluate agent

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
evaluate_results= evaluate_policy(model, env, n_eval_episodes=10, return_episode_rewards=False)[0]

## D3QN

### Hyperparameter Optimizing using Optuna (Only run once)

In [None]:
import gymnasium as gym
from gymnasium.wrappers import NormalizeObservation, NormalizeReward



In [None]:
env = create_env({})

In [None]:
env = NormalizeObservation(env, epsilon=1e-8)


In [None]:
state_size = env.observation_space.shape
num_actions = env.action_space.n


In [None]:
import optuna
from colorama import Fore, Style


n_tune_epsiode = 10
n_trials = 20

def objective(trial):
        # Sample hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-3)
    gamma = trial.suggest_float('gamma', 0.9, 0.9999)
    tau = trial.suggest_float('tau', 0.8, 1.0)
    l2_reg = trial.suggest_float('l2_reg', 0.001, 0.01)
    epsilon_decay = trial.suggest_float('epsilon_decay', 0.0001, 0.4)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512])

    reward_diffs = []
    seeds=[0,1,2]
    for seed in seeds:
        agent = D3QNAgent(
            env=env,
            state_size=state_size,
            num_actions=num_actions,
            memory_size=100000,
            batch_size=batch_size,
            gamma=gamma,
            epsilon_start=1.0,
            epsilon_min=0.01,
            epsilon_decay=epsilon_decay,
            learning_rate=learning_rate,
            tau=tau,
            update_freq=4,
            l2_reg=l2_reg,
            random_state=seed
        )


        training_results = agent.train(
            num_episodes=n_tune_epsiode,
            max_steps_per_episode=200,
            num_points_for_average=100,
            log_interval=10
        )

        rewards_diff = training_results['rewards'][-1] - training_results['rewards'][0]
        reward_diffs.append(rewards_diff)
        print(Fore.BLUE + f"Seed {seed} epsiodes done in trial {trial.number+1}" + Style.RESET_ALL)

    avg_reward_diff = np.mean(reward_diffs)
    print(Fore.GREEN + f"Trial {trial.number+1} Finished, avg_derivative: {avg_reward_diff:.2f}" + Style.RESET_ALL)

    return rewards_diff


# Create an Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=n_trials)
# Print best hyperparameters
print("Best Hyperparameters:", study.best_params)


In [None]:
study.best_params

In [None]:
study.best_value

### Algorithm

In [None]:
env = create_env({})

In [None]:
import gymnasium as gym
from gymnasium.wrappers import NormalizeObservation, NormalizeReward

env = NormalizeObservation(env, epsilon=1e-8)


In [None]:
state_size = env.observation_space.shape
num_actions = env.action_space.n

In [None]:
d3qn_Settings = algorithm_settings['D3QN']
d3qn_experiment_settings =d3qn_Settings[str(ENV_NAME+"_"+REWARD_TYPE)]
d3qn_experiment_settings

In [None]:
parameters={'learning_rate': d3qn_experiment_settings['learning_rate'],
 'gamma': d3qn_experiment_settings['gamma'],
 'tau': d3qn_experiment_settings['tau'],
 'l2_reg': d3qn_experiment_settings['l2_reg'],
 'epsilon_decay': d3qn_experiment_settings['epsilon_decay'],
 'batch_size': d3qn_experiment_settings['batch_size']}


In [None]:
import gymnasium as gym
from gymnasium.wrappers import NormalizeObservation, NormalizeReward
import gym as gymo
from models.d3qn import D3QNAgent
tmp_env = gymo.make('MountainCar-v0',new_step_api=True)

agent_ = D3QNAgent(
            env=tmp_env,
            state_size=tmp_env.observation_space.shape,
            num_actions=tmp_env.action_space.n,

        )

In [None]:
'''
training_results = agent_.train(
            num_episodes=2,
            max_steps_per_episode=20,
            num_points_for_average=2,
            log_interval=1)


'''

In [None]:
#path_save = 'models/tmp_unrelated.h5'
#agent_.save_model(path_save)


In [None]:
#agent_.load_model(path_save)

In [None]:
agent = D3QNAgent(
            env=env,
            state_size=state_size,
            num_actions=num_actions,
            memory_size=100000,
            batch_size=parameters['batch_size'],
            gamma=parameters['gamma'],
            epsilon_start=1.0,
            epsilon_min=0.01,
            epsilon_decay=parameters['epsilon_decay'],
            learning_rate=parameters['learning_rate'],
            tau=parameters['tau'],
            update_freq=4,
            l2_reg=parameters['l2_reg'],
            random_state=seed
        )




### Begin Training

In [None]:
time_before=time.time()
training_results = agent.train(
            num_episodes=n_epsiode,
            max_steps_per_episode=max_steps,
            num_points_for_average=100,
            log_interval=1)
time_after=time.time()

In [None]:
results_dict = env.env.last_run_dict # env.env for D3QN ,env for PPO ##Important should call this before closing env or evaluate


###  Evaluate

In [None]:
evaluate_results=agent.evaluate(num_episodes=10)

In [None]:
rewards=training_results['rewards']
losses=training_results['losses']


In [37]:
# Save the trained model
path_save = str('FINAL_'+ENV_NAME+'_'+REWARD_TYPE+'_'+EXPERIMENT_NAME)
agent.save_model(path_save+".keras")



In [None]:
from keras.config import enable_unsafe_deserialization
enable_unsafe_deserialization()
agent.load_model(path_save+".keras")


## Save and See results

In [None]:
time_diff = time_after - time_before
print(Fore.GREEN + f"Time taken for training: {round(time_diff,3)} seconds ({round(time_diff/60,3)} Minutes)" + Style.RESET_ALL)


In [None]:
def append_to_file(filename, text):
    with open(filename, 'a') as file:
        file.write(text + '\n')  # '\n' لتضيف السطر الجديد بعد كل جملة


In [None]:
last_cumulative_reward = round(rewards[-1],3)

print(Fore.MAGENTA + f"The Cumulative Reward of last Epsiode is : {last_cumulative_reward} ,Using Reward {REWARD_TYPE} " + Style.RESET_ALL)


In [None]:
if 'evaluate_results' in locals():
    print(Fore.CYAN + f"Avg. Reward for evaluated environment: {evaluate_results}" + Style.RESET_ALL)



In [None]:
for key, value in results_dict.items():
    print(Fore.CYAN + f"{key}: {round(value,3)}" + Style.RESET_ALL)
    #append_to_file("output1.txt",f"{key}: {round(value,3)}")



In [None]:
derivative = rewards[-1] - rewards[0]
print(Fore.GREEN + f"Derivative of reward  is {derivative}" + Style.RESET_ALL)


In [None]:
import matplotlib.pyplot as plt
plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Reward per Episode")
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
if 'losses' in locals():
    plt.plot(training_results['losses'])
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Loss per Episode")
    plt.grid(True)
    plt.show()

The Following will work with D3QN and give error in PPO

In [None]:
conn.close()
env.close() # Should be closed after all (as last step results)

## ReLoading

In [None]:
import  Connections.RealConnection
conn2 = Connections.RealConnection.RealConnection()

In [None]:
set_global_conn(conn2)

In [None]:
model_loaded = PPO.load("models/temp_test.zip")

In [None]:
tmp_data = 10* np.random.random((1,7))
random_actions = np.array([1])
random_rewards = np.array([.5])

In [None]:
model_loaded.rollout_buffer.add(tmp_data,random_actions,random_rewards,random_rewards,random_rewards,np.array([1]))

In [None]:
model_loaded.train(tmp_data)

In [None]:
model_loaded

In [None]:
traffic_lights = [(4, '1698478721')]


In [None]:
sumo_home=''

In [None]:
env = create_env({})  # Create the environment instance
vec_env = DummyVecEnv([lambda: env])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False)


env.reset()

In [None]:
action, _states = model_loaded.predict(np.array([5]*7), deterministic=True)


In [None]:
action

In [None]:
conn2.state={agent_id:np.array([5]*7)}

In [None]:
int(action)

In [None]:
obs, reward, done,terminated, info = env.step(int(action))

In [None]:
reward

In [None]:
model_loaded.predict(obs, deterministic=True)

In [None]:
pip install pyserial


In [None]:
import serial
import time

# Replace with your actual COM port. For example, '/dev/ttyUSB0' on Linux or 'COM3' on Windows.
arduino = serial.Serial('COM3', 115200 )
time.sleep(2)  # Give Arduino time to reset

# Send 'g' to turn on LED
arduino.write(b'g')

# Optional: close after use
#arduino.close()


In [None]:
arduino.write(b'g')


In [None]:
arduino.close()

In [None]:
import random
import time
while True:
    s=random.choice([b'g',b'r',b'y'])
    print(f"action {s}")
    arduino.write(s)

    time.sleep(5)