# End-to-End Learning | Practical Session Part I | Data Collection

In [None]:
import numpy as np
import pandas as pd
import gym.wrappers

from datetime import datetime
import matplotlib.pyplot as plt
from interfaces.expert import Policy
from utils import make_Dirs
import random

ratio = [16, 10]
plt.rcParams["figure.figsize"] = ratio
plt.rcParams.update({"font.size": 22})

### Hyperparameter
The hyperparamter N_TIME is for setting the number of sampled state-action pairs for the data set D

In [None]:
N_TIME = 1000

### Directories
In the folder expert our sampled trajectories will be stored. The trajectory folder contains all created trajectories, which are saved as .gz archive files. In the reward folder the reward of the respective run can be tracked.

In [None]:
# make save directories
SAVE_DIR = "./expert/"
make_Dirs(SAVE_DIR)
make_Dirs(SAVE_DIR + "trajectory/")
make_Dirs(SAVE_DIR + "reward/")

## Car Racing Environment
![CarRacing](resources/Open_ai_gym.png)

State consists of 96x96x3 pixels. Reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles in track.
Some indicators are shown at the bottom of the window and the state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope.
Action space is three-dimensional: steer, gas and brake

In [None]:
# prepare environment and agent (expert)
env = gym.make("CarRacing-v0")
env = gym.wrappers.Monitor(
    env, SAVE_DIR + "video/", force=True, video_callable=lambda episode_id: True
)

## Expert Interface
We can use the 'left' and 'right' key on the keyboard for steering the vehicle. With 'A' we can accelerate and with 'S' we brake.
With the following command we import our expert keyboard interface.

In [None]:
policy = Policy("CarRacing-v0")

If there exists already some data in the expert folder, the summary csv file is loaded and the sampled trajectories will be appended.

In [None]:
try:
    summary = pd.read_csv(SAVE_DIR + "summary.csv").values.tolist()
except:
    summary = []

The following main loop is for sampling the trajectories. In the beginning we wait 1 second as there are some frames invalid due to zooming.
After that we are ready to start driving!

In [None]:
trajectory = []
result = []
observation = env.reset()
action = policy.reset()

while env.t <= 1.0:
    env.render()
    observation, reward, done, info = env.step(action)
print("Ready to start!")


# main loop to update trajectory
for t_ in range(1, N_TIME + 1):
    env.render()
    action = policy(observation)
    if "Tensor" in str(type(action)):
        action = action.cpu().data.numpy().flatten()

    trajectory.append(
        (
            np.asarray(observation, dtype=np.float32),
            np.asarray(action, dtype=np.float32),
        )
    )
    observation, reward, done, info = env.step(action)
    result.append(reward)

    if done:
        break

# close everything
env.close()
policy.release()

### Data Saving
After driving we need to save the sampled trajectory. We use pandas to generate GZ files with a corresponding unique file name.
We append to the summary csv file the path of the trajectory.

In [None]:
# record trajectory and return at the end of trajectory
file_name = datetime.now().strftime("%Y%m%d%H%M%S")
tmp = []
print("Finish one episode, and record it to {}".format(file_name))
pd.to_pickle(trajectory, SAVE_DIR + "trajectory/" + file_name + ".gz")
tmp.append(SAVE_DIR + "trajectory/" + file_name + ".gz")
np.savetxt(
    SAVE_DIR + "reward/" + file_name + ".csv", np.array([result]).T, delimiter=","
)
tmp.append(np.sum(result))
print(tmp)
summary.append(tmp)
pd.DataFrame(summary, columns=["file", "return"]).to_csv(
    SAVE_DIR + "summary.csv", index=False
)

In [None]:
print("Number of sampled trajectories: ", len(summary))
print(summary)

In [None]:
sampled_trajectories = pd.read_csv(SAVE_DIR + "summary.csv")["file"].values.tolist()
example_trajectorie = pd.DataFrame(pd.read_pickle(sampled_trajectories[-1]))

state = example_trajectorie.iloc[:, 0].values.tolist()
action = example_trajectorie.iloc[:, 1].values.tolist()

In [None]:
n_rand = random.randint(1, len(state))
plt.imshow(state[n_rand] / 255)
plt.show()
print(
    "The action performed at this time was \nSteering {} \nGas {} \nBrake {}".format(
        action[n_rand][0], action[n_rand][1], action[n_rand][2]
    )
)

### Histogram of sampled actions

In [None]:
action_np = np.asarray(action)

#### Steering Angle

In [None]:
plt.hist(action_np[:, 0], 20)
plt.title("Histogram Steering Angle")
plt.xlabel("Steering Angle")
plt.ylabel("Number of Samples")
plt.show()

#### Throttle Position

In [None]:
plt.hist(action_np[:, 1], 20)
plt.title("Histogram Throttle Position")
plt.xlabel("Throttle Position")
plt.ylabel("Number of Samples")
plt.show()

#### Brake Pedal Position

In [None]:
plt.hist(action_np[:, 2], 20)
plt.title("Histogram Brake Pedal Position")
plt.xlabel("Brake Pedal Position")
plt.ylabel("Number of Samples")
plt.show()

### Expert Statistics

In [None]:
rewards = []
for reward in np.asarray(summary)[:, 1]:
    rewards.append(float(reward))

print(
    "statistics of total expert reward:\n\t max: {}\t min: {}"
    "\n\t median: {}\t mean: {}\t std: {}".format(
        np.max(rewards),
        np.min(rewards),
        np.median(rewards),
        np.mean(rewards),
        np.std(rewards),
    )
)