In [None]:
from pathlib import Path
from typing import Dict, Tuple, List

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.utils.data as data

from data import RaceDataset

from racing_utils.utils import closest_point_idx, cyclic_slice, determine_direction_of_bound, rotate_into_map_coord
from racing_utils.torch_related import TensorStandardScaler, calc_progress_and_penalty, scale_batch_and_to_device
from racing_utils.models import get_omniward_model

from training import one_pass_through_data, process_batch, Phase, plot_losses_vs_epochs

In [None]:
NUM_STEPS_AHEAD_TRAJ = 150
NUM_STEPS_AHEAD_ACT = 10
NUM_STEPS_AHEAD_BOUND = 50
NUM_STEPS_CENTERLINE = 300
CENTERLINE_DECIMATION = 1

DATASET_SUFFIX = '_tiny' # '_tiny', '', or '_large'

DEVICE = 'cuda:0' # 'cuda:0' or 'cpu'

BATCH_SIZE = 512

PROB_FLIP = 0.5  # Mini-augmentation

# Model-related
NUM_LAYERS = 3
WIDTH_REDUCTION = 2.0

In [None]:
# Reproducible
# torch.backends.cudnn.determinstic = True
# torch.backends.cudnn.benchmark = False
#
# Or fast
torch.backends.cudnn.benchmark = True

In [None]:
unpickled = pd.read_pickle('./data/train_large/2021-11-14_17_44_20.360805.pkl')
one_race = unpickled['data']
additional_data = unpickled['additional_data']

In [None]:
centerline = additional_data['centerline'][::CENTERLINE_DECIMATION]
lookahead_distance = additional_data['lookahead_distance']
speed_setpoint = additional_data['speed_setpoint']
tire_force_max = additional_data['tire_force_max']

In [None]:
one_race.head()

# We need to figure out if the bounds go in the same direction as the car is driving

In [None]:
start_position, end_position = one_race.loc[[0, NUM_STEPS_AHEAD_TRAJ], 'position']

In [None]:
bounds = []
bound_directions = []
for csv_file in ['interior.csv', 'exterior.csv']:
    bound = pd.read_csv(Path('./data') / csv_file, header=None).values
    direction = determine_direction_of_bound(bound, start_position, end_position)
    bound_directions.append(direction)
    bound = bound[::direction]
    bounds.append(bound)

In [None]:
row_id = 120 + len(one_race) // 4
row = one_race.iloc[row_id]

position = row['position']
yaw = row['yaw']

In [None]:
closest_bound_indices = [closest_point_idx(position, bound) for bound in bounds] 

In [None]:
bound_slices = [
    cyclic_slice(bound, closest_idx, NUM_STEPS_AHEAD_BOUND)
    for bound, closest_idx in zip(bounds, closest_bound_indices)
]

In [None]:
local_positions = np.stack(one_race['position'].iloc[row_id:row_id+NUM_STEPS_AHEAD_TRAJ].values)

for bound_slice in bound_slices:
    plt.scatter(bound_slice[:, 0], bound_slice[:, 1])
    
plt.scatter(local_positions[:, 0], local_positions[:, 1])
plt.gca().set_aspect('equal')
plt.show()

In [None]:
local_positions = np.stack(one_race['position'].iloc[row_id:(row_id + NUM_STEPS_AHEAD_TRAJ)].values)

closest_centerline_idx = closest_point_idx(local_positions[0], centerline)
centerline_ahead = cyclic_slice(centerline, closest_centerline_idx, NUM_STEPS_CENTERLINE)
centerline_ahead = rotate_into_map_coord(centerline_ahead - position, -yaw)
plt.scatter(centerline_ahead[:, 0], centerline_ahead[:, 1], color='r', alpha=0.2)

for bound_slice in bound_slices:
    bound_slice = rotate_into_map_coord(bound_slice - position, -yaw)
    plt.scatter(bound_slice[:, 0], bound_slice[:, 1], color='gray', alpha=0.2)
        
local_positions = rotate_into_map_coord(local_positions - position, -yaw)
plt.scatter(local_positions[:, 0], local_positions[:, 1], alpha=0.1)
plt.gca().set_aspect('equal')
plt.show()

In [None]:
train_dataset = RaceDataset(
    NUM_STEPS_AHEAD_TRAJ,
    NUM_STEPS_AHEAD_ACT,
    NUM_STEPS_AHEAD_BOUND,
    NUM_STEPS_CENTERLINE,
    f'./data/train{DATASET_SUFFIX}',
    CENTERLINE_DECIMATION,
    flip_prob=PROB_FLIP,
)

In [None]:
features, targets = train_dataset[1000]

In [None]:
left_bound, right_bound = features['left_bound'], features['right_bound']
left_bound = left_bound.reshape(-1, 2)
right_bound = right_bound.reshape(-1, 2)
plt.scatter(left_bound[:, 0], left_bound[:, 1], color='gray', alpha=0.2)
plt.scatter(right_bound[:, 0], right_bound[:, 1], color='gray', alpha=0.2)

centerline = features['centerline'].reshape(-1, 2)
plt.scatter(centerline[:, 0], centerline[:, 1], color='r', alpha=0.2)

trajectory = targets['trajectory']
trajectory = trajectory.reshape(-1, 2)
plt.scatter(trajectory[:, 0], trajectory[:, 1], color='blue', alpha=0.1)
plt.gca().set_aspect('equal')
plt.show();

In [None]:
train_loader = data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Initilize the scalers
features_scalers = {key: TensorStandardScaler(DEVICE) for key in features.keys()}
targets_scalers = {key: TensorStandardScaler(DEVICE) for key in targets.keys()}

# Do a partial fit of the scalers for both the input features, and the targets
for features_batch, targets_batch in train_loader:
    for feature_name in features.keys():
        # TODO: not all features need rescaling, in particular: left_bound, right_bound
        features_scalers[feature_name].partial_fit(features_batch[feature_name])

    for target_name in targets.keys():
        targets_scalers[target_name].partial_fit(targets_batch[target_name])

# Move the numpy structures into a torch.tensor
for feature_name in features.keys():
        features_scalers[feature_name].tensorfy()

for target_name in targets.keys():
    targets_scalers[target_name].tensorfy()

In [None]:
valid_dataset = RaceDataset(
    NUM_STEPS_AHEAD_TRAJ,
    NUM_STEPS_AHEAD_ACT,
    NUM_STEPS_AHEAD_BOUND,
    NUM_STEPS_CENTERLINE,
    f'./data/valid{DATASET_SUFFIX}',
    CENTERLINE_DECIMATION,
    flip_prob=PROB_FLIP,
)

# We're ready for the model

In [None]:
omniward_model = get_omniward_model(
    NUM_LAYERS,
    WIDTH_REDUCTION,
    features,
    targets,
    DEVICE,
)

In [None]:
omniward_model.to(DEVICE)

# We've got the model, now in order to train it we'll need two DataLoaders

In [None]:
train_loader = data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
valid_loader = data.DataLoader(valid_dataset, BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

## The gammas below may seem a bit confusing but the idea is to have a sequence of coefficients that exponentially decrease for the actuators, AND a sequence of coefficients that exponentially decrease from the back for the trajectory

This is because the trajectory is easy to guess at the beginning (all trajectories start in the (0, 0) point) whereas at the end it's more relevant for calculating the reward.

In [None]:
trajectory_size = len(targets['trajectory'])
actuators_size = len(targets['speeds_and_deltas'])

trajectory_gamma = 0.99999
actuators_gamma = 0.8

trajectory_gammas = (trajectory_gamma ** np.r_[np.arange(trajectory_size // 2), np.arange(trajectory_size // 2)])
trajectory_gammas = torch.from_numpy(trajectory_gammas[::-1].copy()).to(DEVICE)

actuators_gammas = actuators_gamma ** np.r_[np.arange(actuators_size // 2), np.arange(actuators_size // 2)]
actuators_gammas = torch.from_numpy(actuators_gammas).to(DEVICE)

In [None]:
penalty_sigma = 0.15

optimizers = (
    [5, torch.optim.Adam(omniward_model.parameters(), lr=1e-3)],
    [15, torch.optim.Adam(omniward_model.parameters(), lr=1e-4)],
    [5, torch.optim.Adam(omniward_model.parameters(), lr=1e-5)],
)

train_traj_mses_for_plot = []
train_act_mses_for_plot = []
train_progress_mses_for_plot = []
train_penalty_mses_for_plot = []

valid_traj_mses_for_plot = []
valid_act_mses_for_plot = []
valid_progress_mses_for_plot = []
valid_penalty_mses_for_plot = []

epoch = 0

for num_epochs_per_optimizer_round, optimizer in optimizers:
    print(f'Optimizer: {optimizer}')
    for _ in range(num_epochs_per_optimizer_round):
        
        #            #
        #  Training  #
        #            #
        one_pass_through_data(
            model=omniward_model,
            optimizer=optimizer,
            phase=Phase.train,
            data_loader=train_loader,
            features_scalers=features_scalers,
            targets_scalers=targets_scalers,
            trajectory_gammas=trajectory_gammas,
            actuators_gammas=actuators_gammas,
            penalty_sigma=penalty_sigma,
            traj_mses_for_plot=train_traj_mses_for_plot,
            act_mses_for_plot=train_act_mses_for_plot,
            progress_mses_for_plot=train_progress_mses_for_plot,
            penalty_mses_for_plot=train_penalty_mses_for_plot,
            epoch=epoch,
            device=DEVICE,
        )
            
        #              #
        #  Validation  #
        #              #
        with torch.inference_mode():
            one_pass_through_data(
                model=omniward_model,
                optimizer=None,
                phase=Phase.valid,
                data_loader=valid_loader,
                features_scalers=features_scalers,
                targets_scalers=targets_scalers,
                trajectory_gammas=trajectory_gammas,
                actuators_gammas=actuators_gammas,
                penalty_sigma=penalty_sigma,
                traj_mses_for_plot=valid_traj_mses_for_plot,
                act_mses_for_plot=valid_act_mses_for_plot,
                progress_mses_for_plot=valid_progress_mses_for_plot,
                penalty_mses_for_plot=valid_penalty_mses_for_plot,
                epoch=epoch,
                device=DEVICE,
            )

        epoch += 1
        print()

    features_batch, targets_batch = next(iter(valid_loader))
    with torch.inference_mode():
        traj_mse, act_mse, progress_mse, penalty_mse = process_batch(
            omniward_model,
            features_scalers,
            targets_scalers,
            features_batch,
            targets_batch,
            trajectory_gammas,
            actuators_gammas,
            penalty_sigma,
            DEVICE,
            make_plots=True,
        )

In [None]:
features_batch, targets_batch = scale_batch_and_to_device(DEVICE, features_scalers, targets_scalers, features_batch, targets_batch)
centerline = features_batch['centerline'].to(DEVICE)
left_bound = features_batch['left_bound'].to(DEVICE)
right_bound = features_batch['right_bound'].to(DEVICE)

In [None]:
%%timeit
with torch.inference_mode():
    preds = omniward_model(**features_batch)
    _1, _2 = preds['trajectory_pred'], preds['actuators_pred']
    _ = targets_scalers['trajectory'].inverse_transform(_1)
    calc_progress_and_penalty(_, centerline, left_bound, right_bound, penalty_sigma=penalty_sigma)

In [None]:
plot_losses_vs_epochs(
    train_traj_mses=train_traj_mses_for_plot,
    valid_traj_mses=valid_traj_mses_for_plot,
    train_act_mses=train_act_mses_for_plot,
    valid_act_mses=valid_act_mses_for_plot,
    train_progress_mses=train_progress_mses_for_plot,
    valid_progress_mses=valid_progress_mses_for_plot,
    train_penalty_mses=train_penalty_mses_for_plot,
    valid_penalty_mses=valid_penalty_mses_for_plot,
)

In [None]:
with torch.inference_mode():
    preds = omniward_model(**features_batch)
    trajectory_pred, actuators_pred = preds['trajectory_pred'], preds['actuators_pred']
    trajectory_pred = targets_scalers['trajectory'].inverse_transform(trajectory_pred)
    progress_pred, penalty_pred = calc_progress_and_penalty(trajectory_pred, centerline, left_bound, right_bound, penalty_sigma=penalty_sigma)

In [None]:
features_batch = {
    feature_name: features_scalers[feature_name].inverse_transform(features_batch[feature_name])
    for feature_name in features_batch.keys()
}
targets_batch = {
    target_name: targets_scalers[target_name].inverse_transform(targets_batch[target_name])
    for target_name in targets_batch.keys()
}

centerline = features_batch['centerline']
centerline = centerline.reshape(len(centerline), -1, 2).cpu().numpy()

traj_pred = trajectory_pred.reshape(len(trajectory_pred), -1, 2).cpu().numpy()

right_bound = features_batch['right_bound']
right_bound = right_bound.reshape(len(right_bound), -1, 2).cpu().numpy()

left_bound = features_batch['left_bound']
left_bound = left_bound.reshape(len(left_bound), -1, 2).cpu().numpy()

trajectory = targets_batch['trajectory']
traj = trajectory.reshape(len(trajectory), -1, 2).cpu().numpy()

In [None]:
for i in range(10):    
    positions = traj[i]
    positions_pred = traj_pred[i]

    plt.plot(positions_pred[:, 0], positions_pred[:, 1], alpha=0.3, linewidth=5)
    plt.plot(positions[:, 0], positions[:, 1], alpha=0.3, linewidth=5)
    plt.plot(centerline[i, :, 0], centerline[i, :, 1], alpha=0.1, linewidth=5, color='red')
    plt.scatter(right_bound[i, :, 0], right_bound[i, :, 1], color='gray', alpha=0.2)
    plt.scatter(left_bound[i, :, 0], left_bound[i, :, 1], color='gray', alpha=0.2)
    plt.gca().set_aspect('equal')

    plt.show()

In [None]:
idx = 1020

In [None]:
features, targets = valid_dataset[idx]

In [None]:
left_bound, right_bound = features['left_bound'], features['right_bound']
left_bound = left_bound.reshape(-1, 2)
right_bound = right_bound.reshape(-1, 2)
plt.scatter(left_bound[:, 0], left_bound[:, 1], color='gray', alpha=0.2)
plt.scatter(right_bound[:, 0], right_bound[:, 1], color='gray', alpha=0.2)

centerline = features['centerline'].reshape(-1, 2)
plt.scatter(centerline[:, 0], centerline[:, 1], color='r', alpha=0.2)

trajectory = targets['trajectory']
trajectory = trajectory.reshape(-1, 2)
plt.scatter(trajectory[:, 0], trajectory[:, 1], color='blue', alpha=0.1)
plt.gca().set_aspect('equal')
plt.show();

In [None]:
features_batch = {
    feature_name: features_scalers[feature_name].transform(features_batch[feature_name])
    for feature_name in features_batch.keys()
}
targets_batch = {
    target_name: targets_scalers[target_name].transform(targets_batch[target_name])
    for target_name in targets_batch.keys()
}

In [None]:
speeds_and_deltas = targets_scalers['speeds_and_deltas'].inverse_transform(actuators_pred)[0].cpu()
speeds_and_deltas_gt = targets_scalers['speeds_and_deltas'].inverse_transform(targets_batch['speeds_and_deltas'])[0].cpu()

half = len(speeds_and_deltas) // 2
plt.plot(speeds_and_deltas[:half])
plt.plot(speeds_and_deltas_gt[:half])
plt.show()
plt.plot(speeds_and_deltas[half:])
plt.plot(speeds_and_deltas_gt[half:]);

In [None]:
eta = 0.05
num_steps_for_grad = 4
num_contr_params = features_batch['contr_params'].shape[1]


grad_contr_param = np.zeros(num_contr_params)
x = eta * np.arange(-num_steps_for_grad, num_steps_for_grad+1)
for contr_param_idx in range(num_contr_params):
    progress = []
    penalties = []
    for pred_step in range(2 * num_steps_for_grad):
        pred_idx = 1 + pred_step * num_contr_params + contr_param_idx
        progress.append(float(progress_pred[pred_idx].cpu()))
        penalties.append(float(penalty_pred[pred_idx].cpu()))
        if pred_step == num_steps_for_grad:
            progress.append(float(progress_pred[0].cpu()))
            penalties.append(float(penalty_pred[0].cpu()))
        
    coeffs = np.polyfit(x, progress, deg=1)
    plt.plot(x, coeffs[0] * x + coeffs[1])
    plt.plot(x, progress)
    plt.show()

    coeffs = np.polyfit(x, penalties, deg=1)
    plt.plot(x, coeffs[0] * x + coeffs[1])
    plt.plot(x, penalties)
    plt.show()

    print(80 * '-')

    grad_contr_param[contr_param_idx] = coeffs[0]


In [None]:
batch_size = features_batch['contr_params'].shape[0]

for idx in range(batch_size):
    positions = traj[idx]
    plt.scatter(positions[:, 0], positions[:, 1], alpha=0.01)

In [None]:
omniward_model_cpu = omniward_model.to('cpu')


for features_scaler in features_scalers.values():
    features_scaler.to('cpu')

for targets_scaler in targets_scalers.values():
    targets_scaler.to('cpu')

In [None]:
contr_param_limits = train_dataset.determine_limits()

In [None]:
from racing_utils import GradientDriver


centerline_direction = 1
left_bound_direction = valid_dataset.traj_data[0]['bound_directions'][0]
right_bound_direction = valid_dataset.traj_data[0]['bound_directions'][1]

grad_driver = GradientDriver(
    centerline=additional_data['centerline'][::centerline_direction],
    num_steps_centerline=NUM_STEPS_CENTERLINE,

    left_bound=valid_dataset.bounds[0][::left_bound_direction],
    right_bound=valid_dataset.bounds[1],
    num_steps_ahead_bound=NUM_STEPS_AHEAD_BOUND,

    # Controller-related
    init_contr_params=np.r_[additional_data['lookahead_distance'], additional_data['speed_setpoint'], additional_data['tire_force_max']],

    # Model-related
    omniward_model=omniward_model_cpu,
    features_scalers=features_scalers,
    targets_scalers=targets_scalers,

    # Gradient-related
    eta=0.1,
    num_steps_for_grad=4,
    penalty_sigma=0.3,
    penalty_scale_coeff=-0.9,
    contr_params_limits=contr_param_limits,

    device='cpu',
)

grad_driver.plan(
    ranges=None,
    yaw=row['yaw'],
    pos_x=row['position'][0],
    pos_y=row['position'][1],
    linear_vel_x=row['v_x'],
    linear_vel_y=row['v_y'],
    angular_vel_z=row['omega'],
)

In [None]:
pd.to_pickle(grad_driver, 'grad_driver.pkl')

In [None]:
start_idx, end_idx = 100, 400

speed_preds = []
speed_ground_truths = []
steer_preds = []
steer_ground_truths = []


for row_idx, row in one_race.iterrows():
    if row_idx < start_idx:
        continue

    speed, steer = grad_driver.plan(
        ranges=None,
        yaw=row['yaw'],
        pos_x=row['position'][0],
        pos_y=row['position'][1],
        linear_vel_x=row['v_x'],
        linear_vel_y=row['v_y'],
        angular_vel_z=row['omega'],
    )
    speed_gt = row['speed_actuator']
    steer_gt = row['delta']

    speed_preds.append(speed)
    speed_ground_truths.append(speed_gt)
    steer_preds.append(steer)
    steer_ground_truths.append(steer_gt)

    if row_idx == end_idx:
        break

In [None]:
plt.scatter(steer_ground_truths, steer_preds);

In [None]:
plt.scatter(speed_ground_truths, speed_preds);