### installation for colab
To Run in Colab: uncomment and run the following

In [None]:
# !git clone https://github.com/AssistiveRoboticsUNH/bc_tutorial.git
# !pip install gym==0.26.2

In [1]:
import numpy as np 
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.optim import Adam
from matplotlib import pyplot as plt
import pickle 
import time 
import imageio 

In [2]:
# data_path = "expert_data/human_demos_5_435.0.pkl"
data_path = "/content/bc_tutorial/car_racing/expert_data/human_demos_2_432.0.pkl"
 
with open(data_path, 'rb') as f:
    trajs = pickle.load(f)

print(f"Number of trajectories: {len(trajs)}") 

Number of trajectories: 5


In [3]:
states=[]
actions=[]
for traj in trajs:
    for state,action in traj:
        states.append(state)
        actions.append(action)

states=np.array(states, dtype=np.float32)
actions=np.array(actions, dtype=np.float32)

states = np.transpose(states, (0,3,1,2))
states.shape,actions.shape

((5000, 3, 96, 96), (5000, 3))

In [4]:
data_loader = torch.utils.data.DataLoader( list(zip(states, actions)), batch_size=64, shuffle=True)

batch=next(iter(data_loader))
states,actions = batch
states.shape,actions.shape

(torch.Size([64, 3, 96, 96]), torch.Size([64, 3]))

In [5]:
action_dim=actions.shape[1]
state_dim=states.shape[1:]
print(f"State dim: {state_dim}, Action dim: {action_dim}")

State dim: torch.Size([3, 96, 96]), Action dim: 3


In [6]:
class CNN_Fe(nn.Module):
    def __init__(self, fe_dim=512):
        super(CNN_Fe, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )
        self.fc = nn.Sequential(
            nn.Linear(8 * 8 * 64, fe_dim),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [7]:
class RegNet(CNN_Fe):
    def __init__(self, action_dim, fe_dim=512):
        super(RegNet, self).__init__(fe_dim)
        self.decoder = nn.Linear(fe_dim, action_dim)
    def forward(self,x):
        fe=super().forward(x) 
        x = self.decoder(fe)
        return x

In [8]:
bc = RegNet(action_dim=action_dim)
criterion = nn.MSELoss()
optimizer = Adam(bc.parameters(), lr=1e-4)

In [9]:
st=time.time()
loss_list = []
test_loss = [] 
n_epoch = 5_00
 
for itr in range(0, n_epoch+1):
    total_loss = 0
    b=0
    for batch_states, batch_actions in data_loader: 
        y_pred = bc(batch_states.float())
        loss   = criterion(y_pred, batch_actions) 
        total_loss += loss.item() 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        b += 1 
        
        loss_list.append(loss.item())
        
    if itr % (n_epoch//20)==0:
        print(f'Epoch {itr} Loss: {total_loss/b:.4f}')

et=time.time()
mt=(et-st)/60
print(f"\nTraining time: {mt:.2f} min")

Epoch 0 Loss: 0.5746
Epoch 25 Loss: 0.0031
Epoch 50 Loss: 0.0021
Epoch 75 Loss: 0.0017
Epoch 100 Loss: 0.0013
Epoch 125 Loss: 0.0011
Epoch 150 Loss: 0.0007
Epoch 175 Loss: 0.0004
Epoch 200 Loss: 0.0002
Epoch 225 Loss: 0.0002
Epoch 250 Loss: 0.0001
Epoch 275 Loss: 0.0001
Epoch 300 Loss: 0.0001
Epoch 325 Loss: 0.0001
Epoch 350 Loss: 0.0001
Epoch 375 Loss: 0.0000
Epoch 400 Loss: 0.0000
Epoch 425 Loss: 0.0000
Epoch 450 Loss: 0.0000
Epoch 475 Loss: 0.0001
Epoch 500 Loss: 0.0001

Training time: 13.60 min


### Inference

In [10]:
env_name="CarRacing-v2"

In [11]:
bc.eval()
pass 

(96, 96, 3)

In [15]:
def play_an_episode(env_name, model, video_path=None, max_steps=1000):
    video_writer=None 
    if video_path is not None:
        print(f'Saving video to {video_path}')
        video_writer = imageio.get_writer(video_path, fps=20)
        env=gym.make(env_name, render_mode='rgb_array')
    else:
        env=gym.make(env_name)

    obs,_=env.reset()
    rewards=0
    step=0
    for _ in range(max_steps):
        step+=1
        state=obs[None,:]
        state=np.transpose(state, (0,3,1,2))
        state=torch.tensor(state, dtype=torch.float32)
        pred=model(state).detach().numpy()[0]
        obs, reward, done, trunc,_ = env.step(pred)

        if video_path is not None:
            image=env.render()
            video_writer.append_data(image)

        rewards+=reward
        if done or trunc:
            break

    if video_path is not None:
        video_writer.close()
    return {'reward':rewards, 'step':step-1}

In [13]:
scores=[]
n_trajectory=20
for i in range(n_trajectory):
    stats=play_an_episode(env_name, bc)
    rewards=stats['reward']
    print(f'episode #{i} reward: {rewards:0.2f}')
    scores.append(rewards)

print(f'\n score: {np.mean(scores):0.2f} +- {np.std(scores):0.2f}')

episode #0 reward: 470.42
episode #1 reward: 271.07
episode #2 reward: 377.74
episode #3 reward: 475.44
episode #4 reward: 493.41
episode #5 reward: 242.28
episode #6 reward: 475.97
episode #7 reward: 381.73
episode #8 reward: 467.47
episode #9 reward: 450.68
episode #10 reward: 483.63
episode #11 reward: 458.22
episode #12 reward: 329.94
episode #13 reward: 257.38
episode #14 reward: 443.33
episode #15 reward: 413.99
episode #16 reward: 390.68
episode #17 reward: 329.04
episode #18 reward: 414.29
episode #19 reward: 404.53

 score: 401.56 +- 76.68


### Let's render and save a video using the learned policy.

In [16]:
stats=play_an_episode(env_name, bc, video_path='bc_carracing.mp4')
stats 

Saving video to bc_carracing.mp4




{'reward': 446.7625899280513, 'step': 999}

In [1]:
from IPython.display import HTML
from base64 import b64encode

video_path = 'bc_carracing.mp4'

mp4 = open(video_path, "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(f"""
<video width=400 controls>
      <source src="{data_url}" type="video/mp4">
</video>
""")