# Actor Critic with Summarization (ACS) (DDPG-S)

The first few cell tackles dependencies, the others can be ran individually to train / make graphs.

In [None]:
!pip install torch

!pip install "pandas==2.0.0"

!pip install submodlib

# Install MetaDrive, a lightweight driving simulator
!pip install git+https://github.com/metadriverse/metadrive

# Test whether MetaDrive is properly installed. No error means the test is passed.
!python -m metadrive.examples.profile_metadrive --num-steps 100

!pip install scipy seaborn tabulate pyyaml

# Update(2022-11-03): Fix pyglet compatability issue since it is updated to 2.0.0 recently.
!pip install "pyglet<2.0.0"

In [None]:
import os
try:
    
    from google.colab import drive
    drive.mount('/content/gdrive')

    DRIVE_PATH = '/content/gdrive/My\ Drive/cs260R'
    DRIVE_PYTHON_PATH = DRIVE_PATH.replace('\\', '')
    if not os.path.exists(DRIVE_PYTHON_PATH):
      %mkdir $DRIVE_PATH

    ## the space in `My Drive` causes some issues,
    ## make a symlink to avoid this
    SYM_PATH = '/content/cs260R'
    if not os.path.exists(SYM_PATH):
      !ln -s $DRIVE_PATH $SYM_PATH
    
    running_in_colab = True
    
    # We already mounted in our google drive.
    # Enter the foler where you put files in:
    %cd '/content/cs260R'

    # Current working directory:
    !pwd

    # What files are there:
    !ls
   
    
except ModuleNotFoundError:
    running_in_colab = False
    print(
        "I guess you are running locally. If you get this message in Colab, check the files."
    )


## 1. DDPG (AC)

In [None]:
# Debug code, use this to check if the implementation is functional

# !python train_AC.py \
# --env-id Pendulum-v1 \
# --log-dir Pendulum-v1 \
# --max-steps 50000 \
# --start-steps 1000 \
# --lr 1e-3

In [None]:
# Graphing function for the debug code for AC-Pendulum

# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns

# progress = pd.read_csv("Pendulum-v1/AC/progress.csv")
# plt.figure(dpi=300)
# sns.set("notebook", "darkgrid")
# ax = sns.lineplot(
#     data=progress,
#     x="total_steps",
#     y="episode_reward"
# )
# ax.set_title("AC agent in Pendulum-v1")
# ax.set_ylabel("Episode Reward Mean")
# ax.set_xlabel("Sampled Steps")

# # TODO: You should remove this line in your code.
# # ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

# ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# plt.savefig('pendulum-AC.png', format='png', dpi=300, bbox_inches="tight")


In [None]:
# Trains DDPG (AC) on the environment, generates baseline performance.
# Note: DDPG varies greatly run-to-run.

!python train_AC.py \
--env-id MetaDrive-Tut-Hard-v0 \
--log-dir MetaDrive-Tut-Hard-v0 \
--max-steps 100_000

In [None]:
# Graphing function for DDPG training

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

progress = pd.read_csv("MetaDrive-Tut-Hard-v0/AC/progress.csv")
print("The log contains these keys: ", progress.keys())
plt.figure(dpi=300)
sns.set("notebook", "darkgrid")
ax = sns.lineplot(
    data=progress,
    x="total_steps",
    y="episode_reward"
)
ax.set_title("AC agent in MetaDrive-Tut-Hard-v0")

ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# TODO: Remove this line in your code.
# ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

plt.savefig('metadrive-hard-AC.png', format='png', dpi=300, bbox_inches="tight")


## 2. DDPG-S (ACS) (Ours)

In [None]:
# Trains DDPG-S (ACS) Model on Pendulum as a debug procedure

# !python train_ACS.py \
# --env-id Pendulum-v1 \
# --log-dir Pendulum-v1 \
# --max-steps 50000 \
# --start-steps 10000 \
# --lr 1e-3

In [None]:
# Graph generation function for DDPG-S-Pendulum

# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns

# progress = pd.read_csv("Pendulum-v1/ACS/progress.csv")
# plt.figure(dpi=300)
# sns.set("notebook", "darkgrid")
# ax = sns.lineplot(
#     data=progress,
#     x="total_steps",
#     y="episode_reward"
# )
# ax.set_title("ACS agent in Pendulum-v1")
# ax.set_ylabel("Episode Reward Mean")
# ax.set_xlabel("Sampled Steps")

# # TODO: You should remove this line in your code.
# # ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

# ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# plt.savefig('pendulum-ACS.png', format='png', dpi=300, bbox_inches="tight")


In [None]:
# Trains DDPG-S on MetaDrive

!python train_ACS.py \
--env-id MetaDrive-Tut-Hard-v0 \
--log-dir MetaDrive-Tut-Hard-v0 \
--max-steps 100_000

In [None]:
# Graphing function of DDPG-S-MetaDrive

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

progress = pd.read_csv("MetaDrive-Tut-Hard-v0/ACS/progress.csv")
print("The log contains these keys: ", progress.keys())
plt.figure(dpi=300)
sns.set("notebook", "darkgrid")
ax = sns.lineplot(
    data=progress,
    x="total_steps",
    y="episode_reward"
)
ax.set_title("ACS agent in MetaDrive-Tut-Hard-v0")

ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# TODO: Remove this line in your code.
# ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

plt.savefig('metadrive-hard-ACS.png', format='png', dpi=300, bbox_inches="tight")


In [None]:
# Graphing function of DDPG-S-MetaDrive

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

progress = pd.read_csv("MetaDrive-Tut-Hard-v0/ACS/progress.csv")
print("The log contains these keys: ", progress.keys())
plt.figure(dpi=300)
sns.set("notebook", "darkgrid")
ax = sns.lineplot(
    data=progress,
    x="total_steps",
    y="success_rate"
)
ax.set_title("ACS agent in MetaDrive-Tut-Hard-v0")

ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# TODO: Remove this line in your code.
# ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

plt.savefig('metadrive-hard-ACS-success-rate.png', format='png', dpi=300, bbox_inches="tight")

## 3. TD3

In [None]:
# Trains TD3 for the comparison

!python train_td3.py \
--env-id MetaDrive-Tut-Hard-v0 \
--log-dir MetaDrive-Tut-Hard-v0 \
--max-steps 100_000

In [None]:
# Graphing function for TD3

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

progress = pd.read_csv("MetaDrive-Tut-Hard-v0/td3/progress.csv")
print("The log contains these keys: ", progress.keys())
plt.figure(dpi=300)
sns.set("notebook", "darkgrid")
ax = sns.lineplot(
    data=progress,
    x="total_steps",
    y="episode_reward"
)
ax.set_title("TD3 agent in MetaDrive-Tut-Hard-v0")

ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# TODO: Remove this line in your code.
# ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

plt.savefig('metadrive-hard-td3.png', format='png', dpi=300, bbox_inches="tight")


## 4. PPO

In [None]:
# Trains PPO for the comparison

!python train_ppo.py \
  --env-id MetaDrive-Tut-Hard-v0 \
  --log-dir MetaDrive-Tut-Hard-v0 \
  --num-envs 10 \
  --max-steps 1_000_000

In [None]:
# Graphs PPO for the comparison

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

ppo_progress = pd.read_csv("MetaDrive-Tut-Hard-v0/ppo/progress.csv")
plt.figure(dpi=300)
sns.set("notebook", "darkgrid")
ax = sns.lineplot(
    data=ppo_progress,
    x="total_steps",
    y="episode_reward"
)
ax.set_title("PPO agent in MetaDrive-Tut-Hard-v0")
ax.set_ylabel("Episode Reward Mean")
ax.set_xlabel("Sampled Steps")

ax.ticklabel_format(style='sci', scilimits=(0,0), axis='x')

# TODO: Remove this line
# ax.annotate("REF-DELETE-IT", (0, ax.get_ylim()[0]), size=50, alpha=0.05)

plt.savefig('metadrive-hard-ppo.png', format='png', dpi=300, bbox_inches="tight")


## 5. Comparison Graphs

In [None]:
# Graphing function for the comparison graph of DDPG (AC), DDPG-S (ACS), and TD3

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

plot_data = []
AC_df = pd.read_csv("MetaDrive-Tut-Hard-v0/AC/progress.csv")
TD3_df = pd.read_csv("MetaDrive-Tut-Hard-v0/td3/progress.csv")
PPO_df = pd.read_csv("MetaDrive-Tut-Hard-v0/ppo/progress.csv")
ACS_df = pd.read_csv("MetaDrive-Tut-Hard-v0/ACS/progress.csv")

# AC_df
for index, row in AC_df.iterrows():
    plot_data.append({
        'value': row['episode_reward'],
        'total_episodes': row['total_steps'],
        'label': 'AC'
    })

for index, row in TD3_df.iterrows():
    plot_data.append({
        'value': row['episode_reward'],
        'total_episodes': row['total_steps'],
        'label': 'TD3'
    })

for index, row in ACS_df.iterrows():
    plot_data.append({
        'value': row['episode_reward'],
        'total_episodes': row['total_steps'],
        'label': 'ACS'
    })

plot_data = pd.DataFrame(plot_data)

plt.figure(dpi=300)
sns.set("paper", style="darkgrid")

# Plot
ax = sns.lineplot(
    data=plot_data,
    x='total_episodes',
    y='value',
    hue='label'
)

# Set the plot title and labels
ax.set_title('ACS(Ours) vs AC(baseline) vs TD3 on MetaDrive-Tut-Hard-V0')
ax.set_xlabel('Total Steps')
ax.set_ylabel('Episode Reward')

# Set the legend
ax.legend(title='Algorithm', fontsize=8, loc='upper right')

# Save the plot
plt.savefig('algorithm comparison episode reward.png', format='png', dpi=300, bbox_inches="tight")

In [None]:
# Graphing function for the comparison graph of DDPG (AC), DDPG-S (ACS), and TD3

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

plot_data = []
AC_df = pd.read_csv("MetaDrive-Tut-Hard-v0/AC/progress.csv")
TD3_df = pd.read_csv("MetaDrive-Tut-Hard-v0/td3/progress.csv")
PPO_df = pd.read_csv("MetaDrive-Tut-Hard-v0/ppo/progress.csv")
ACS_df = pd.read_csv("MetaDrive-Tut-Hard-v0/ACS/progress.csv")

# AC_df
for index, row in AC_df.iterrows():
    plot_data.append({
        'value': row['success_rate'],
        'total_episodes': row['total_steps'],
        'label': 'AC'
    })

for index, row in TD3_df.iterrows():
    plot_data.append({
        'value': row['success_rate'],
        'total_episodes': row['total_steps'],
        'label': 'TD3'
    })

for index, row in ACS_df.iterrows():
    plot_data.append({
        'value': row['success_rate'],
        'total_episodes': row['total_steps'],
        'label': 'ACS'
    })

plot_data = pd.DataFrame(plot_data)

plt.figure(dpi=300)
sns.set("paper", style="darkgrid")

# Plot
ax = sns.lineplot(
    data=plot_data,
    x='total_episodes',
    y='value',
    hue='label'
)

# Set the plot title and labels
ax.set_title('ACS(Ours) vs AC(baseline) vs TD3 on MetaDrive-Tut-Hard-V0')
ax.set_xlabel('Total Steps')
ax.set_ylabel('Success Rate')

# Set the legend
ax.legend(title='Algorithm', fontsize=8, loc='upper right')

# Save the plot
plt.savefig('algorithm comparison success rate.png', format='png', dpi=300, bbox_inches="tight")