# Imports

In [1]:
from treelib import Node, Tree
import tqdm

import os
import time
import pickle
import keyboard
import numpy as np
import random
import pandas as pd
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

import tminterface as tmi
from tminterface.interface import TMInterface, Client

# Useful Functions

In [2]:
def discrete_to_continuous(n):

    """
    Equivalents:
    
    0: no action
    1: left
    2: left + acceleration
    3: acceleration
    4: right + acceleration
    5: right
    """

    current_action = {
    'sim_clear_buffer': True,  
    "steer":           0,
    "accelerate":      False, 
    "brake" :          False
    }
    
    if n == 1:
        current_action["steer"] = -65536
    if n == 2:
        current_action["steer"] = -65536
        current_action["accelerate"] = True
    if n == 3:
        current_action["accelerate"] = True
    if n == 4:
        current_action["steer"] = 65536
        current_action["accelerate"] = True
    if n == 5:
        current_action["steer"] = 65536
        
    return current_action
    

def distance_3D(x, y, z, x0, y0, z0):
    d_x = x - x0
    d_y = y - y0
    d_z = z - z0
    dis = np.sqrt( d_x**2 + d_y**2 + d_z**2)
    return dis

def centerline_objective(track_name):
    run_folder = "track_data/"+ track_name + "/run-1"
    positions = pickle.load(open(os.path.join(run_folder, "positions.pkl"), "rb"))
    finish_time = positions[-1]["time"]/1000

    raw_points = [list(pos['position'].to_numpy()) for pos in positions]
    df = pd.DataFrame(raw_points)
    ema = df.ewm(com=40).mean()
    raw_points = ema.values.tolist()
        
    # remove duplicates:
    points = [raw_points[0]]
    for point in raw_points[1:]:
        if point != points[-1]:
            points.append(point)
        else:
            for i in range(len(point)):
                point[i] += 0.01
            points.append(point)
    points = np.array(points)

    # Time along the track:
    time = np.linspace(0, 1, len(points))

    interpolator =  interp1d(time, points, kind='slinear', axis=0)
    alpha = np.linspace(0, 1, len(points))
    curve = interpolator(alpha)

    return curve, alpha

# Explorer Class

In [14]:
class TreeExplorer():
    def __init__(self, root_start_state):
        self.tree = Tree()
        self.tree.create_node(identifier="root", data={"start_state":root_start_state})
        self.current_position = self.tree["root"]
        self.possible_actions = [2, 3, 4]
        self.terminated = False
        
        self.depth = 0
        
    def explore_node(self):
        explored_nodes = self.tree.children(self.current_position.identifier)
        tried_actions = [node.data["action"] for node in explored_nodes]
        local_possible_actions = []
        for action in self.possible_actions:
            if action not in tried_actions:
                local_possible_actions.append(action)

        # Unexplored node is available
        if len(local_possible_actions) > 0:
            return local_possible_actions[0]

        # All nodes have been explored 
        else:
            best_perf = -np.inf
            future_node = None
            children = self.tree.children(self.current_position.identifier)
            
            # Evaluate all children nodes
            for child in children:
                
                # Check for success
                if child.data["success"]:
                    self.terminated = True
                    future_node = child
                    break
                
                # Pick the best one
                if child.data["viable"] and child.data["perf"] > best_perf:
                    best_perf = child.data["perf"]
                    future_node = child
                
            # Come back if no child is viable
            if future_node is None:
                self.current_position.data["viable"] = False
                
                # Check for root
                if self.current_position.identifier == "root":
                    self.terminated = True
                else:
                    self.current_position = self.tree.parent(self.current_position.identifier)
                    self.depth = self.depth - 1
                
            else:    
                self.current_position = future_node
                self.depth = self.depth + 1

        return self.explore_node()
        
            
    def record_leaf(self, action, perf, viable, success, start_state):        
        data = {"action": action,
                "perf": perf,
                "viable": viable,
                "success": success,
                "start_state": start_state}
        self.tree.create_node(parent=self.current_position, data=data)
        
    def reconstruct_trajectory(self):
        reconstruct_position = self.current_position
        action_list = []
        
        while reconstruct_position.identifier != "root":
            action_list.append(reconstruct_position.data["action"])
            reconstruct_position = self.tree.parent(reconstruct_position.identifier)
            
        action_list.reverse()
        return action_list
    

# Client Classes

## Abstract Client

In [15]:
class AbstractClient(Client):

    def __init__(self):
        super().__init__()
        self.period_ms = 1000
        self.final_state = None
        self.start_state = None
        self.is_finished = False
        self.selected_action = False
        self.crashed = False
        
    def on_registered(self, iface: TMInterface) -> None:
        iface.execute_command("press delete")
        print(f'Registered to {iface.server_name}')

    def on_run_step(self, iface, _time: int):
        self.action(iface, _time)

    def on_checkpoint_count_changed(self, iface, current: int, target: int):
        if current >= 1 and current == target:
            self.is_finished = True
            print(iface.get_simulation_state().position)
            iface.prevent_simulation_finish()

    def reset_detection(self, _time, state):
        if state.position[1] < 9.2:
            return True
    
        if _time >= 500:
            local_velocity = state.scene_mobil.current_local_speed
            local_velocity = np.array(list(local_velocity.to_numpy()))
            local_velocity = local_velocity*3.6
            if local_velocity[2] < 10:
                return True

        if state.scene_mobil.has_any_lateral_contact:
            return True

        return False

    def action(self, iface, _time: int):
        if _time >= 0:
            # print(self.selected_action, _time)
            command = discrete_to_continuous(self.selected_action)
            iface.set_input_state(**command)

            if self.reset_detection(_time, iface.get_simulation_state()):
                self.crashed = True
                self.finish(iface, _time)
            else: 
                self.crashed = False
                
            if _time == self.anchor:
                self.final_state = iface.get_simulation_state()
                self.finish(iface,  _time)

    def finish(self, iface,  _time):
        # self.final_state = iface.get_simulation_state()
        iface.rewind_to_state(self.start_state)

## Training Client and Replay Client

In [16]:
class TrainingClient(AbstractClient):

    def __init__(self, period=1000, training_track_name="Deterministic_Proof"):
        super().__init__()

        self.period_ms = period

        # Centerline loading
        centerline, alpha = centerline_objective(training_track_name)
        self.centerline = centerline
        self.alpha = alpha
        self.centerline_x = self.centerline[:,0]
        self.centerline_y = self.centerline[:,1]
        self.centerline_z = self.centerline[:,2]

        # Explorer parameters
        self.anchor = self.period_ms - 10
        self.explorer = None
        self.selected_action = None
        self.save_state = None

    def objective_function(self):
        # position = self.final_state.position
        
        # # compute distance 
        # dis = distance_3D(self.centerline_x, self.centerline_y, self.centerline_z, 
        #                   position[0], position[1], position[2])
        # # find the minima
        # glob_min_idx = np.argmin(dis)
        # associated_time = self.alpha[glob_min_idx]
        # return associated_time

        speed = self.final_state.scene_mobil.current_local_speed[2]
        return speed

    def on_run_step(self, iface, _time: int):
        
        # IMPORTANT: 1 step offset to prevent missing inputs
        if _time == -10 and self.explorer is None: 
            root_start_state = iface.get_simulation_state()
            self.explorer = TreeExplorer(root_start_state)
            self.selected_action = self.explorer.explore_node()
            self.start_state = self.explorer.current_position.data["start_state"]

        if self.explorer is not None:
            self.action(iface, _time)

        # if _time == self.anchor - 10: # IMPORTANT: 1 step offset to prevent missing inputs
        #     self.save_state = iface.get_simulation_state()
            
    def finish(self, iface, _time):

        # Record leaf outcomes
        viable = self.crashed is False
        action = self.selected_action 
        if viable:
            perf = self.objective_function()
            success = self.is_finished
            start_state = self.final_state
        else:
            perf = None
            success = False
            start_state = None
        
        self.explorer.record_leaf(action, perf, viable, success, start_state)

        # Explore a new leaf
        self.selected_action = self.explorer.explore_node()
        self.start_state = self.explorer.current_position.data["start_state"]
        self.anchor = self.explorer.current_position.data["start_state"].race_time + self.period_ms

        iface.rewind_to_state(self.start_state)

    def reconstruct_trajectory(self):
        trajectory = self.explorer.reconstruct_trajectory()
        trajectory.append(self.selected_action)
        return trajectory
        
class ReplayClient(AbstractClient):

    def __init__(self, period, dna):
        super().__init__()
        self.period_ms = period
        self.dna = dna

    def on_run_step(self, iface, _time: int):
        if self.start_state is not None:
            self.action(iface, _time)
        if _time == - 10:
            self.start_state = iface.get_simulation_state()


    def action(self, iface, _time: int):
        if _time >= 0:
            action = self.dna[_time//self.period_ms]
            command = discrete_to_continuous(action)
            iface.set_input_state(**command)
            # print(action, _time)

            if self.reset_detection(_time, iface.get_simulation_state()):
                self.crashed = True
                self.finish(iface, _time)
            else: 
                self.crashed = False
                
    def finish(self, iface, _time):
        self.final_state = iface.get_simulation_state()
        iface.rewind_to_state(self.start_state)

# TRAINING

In [22]:
interface = TMInterface()
client = TrainingClient(period=500)

interface.register(client)
print("Start")

while client.is_finished is False:
    time.sleep(0.001)

    if keyboard.is_pressed("q"):
        print("Keybord Interrupt")
        break

interface.close()

best_trajectory = client.reconstruct_trajectory()
print(best_trajectory)

Start
Registered to TMInterface0
2 0
2 10
2 20
2 30
2 40
2 50
2 60
2 70
2 80
2 90
2 100
2 110
2 120
2 130
2 140
2 150
2 160
2 170
2 180
2 190
2 200
2 210
2 220
2 230
2 240
2 250
2 260
2 270
2 280
2 290
2 300
2 310
2 320
2 330
2 340
2 350
2 360
2 370
2 380
2 390
2 400
2 410
2 420
2 430
2 440
2 450
2 460
2 470
2 480
2 490
3 0
3 10
3 20
3 30
3 40
3 50
3 60
3 70
3 80
3 90
3 100
3 110
3 120
3 130
3 140
3 150
3 160
3 170
3 180
3 190
3 200
3 210
3 220
3 230
3 240
3 250
3 260
3 270
3 280
3 290
3 300
3 310
3 320
3 330
3 340
3 350
3 360
3 370
3 380
3 390
3 400
3 410
3 420
3 430
3 440
3 450
3 460
3 470
3 480
3 490
4 0
4 10
4 20
4 30
4 40
4 50
4 60
4 70
4 80
4 90
4 100
4 110
4 120
4 130
4 140
4 150
4 160
4 170
4 180
4 190
4 200
4 210
4 220
4 230
4 240
4 250
4 260
4 270
4 280
4 290
4 300
4 310
4 320
4 330
4 340
4 350
4 360
4 370
4 380
4 390
4 400
4 410
4 420
4 430
4 440
4 450
4 460
4 470
4 480
4 490
2 500
2 510
2 520
2 530
2 540
2 550
2 560
2 570
2 580
2 590
2 600
2 610
2 620
2 630
2 640
2 650
2 66

# TESTING

In [23]:
interface = TMInterface()
# best_trajectory = [3, 3, 3, 3, 3, 2, 3, 2, 3, 4, 3, 4, 2]
client = ReplayClient(period=500, dna=best_trajectory)

interface.register(client)

print("Start")
while client.is_finished is False:
    time.sleep(0.001)

    if keyboard.is_pressed("q"):
        print("Keybord Interrupt")
        break
interface.close()

Start
Registered to TMInterface0
3 0
3 10
3 20
3 30
3 40
3 50
3 60
3 70
3 80
3 90
3 100
3 110
3 120
3 130
3 140
3 150
3 160
3 170
3 180
3 190
3 200
3 210
3 220
3 230
3 240
3 250
3 260
3 270
3 280
3 290
3 300
3 310
3 320
3 330
3 340
3 350
3 360
3 370
3 380
3 390
3 400
3 410
3 420
3 430
3 440
3 450
3 460
3 470
3 480
3 490
3 500
3 510
3 520
3 530
3 540
3 550
3 560
3 570
3 580
3 590
3 600
3 610
3 620
3 630
3 640
3 650
3 660
3 670
3 680
3 690
3 700
3 710
3 720
3 730
3 740
3 750
3 760
3 770
3 780
3 790
3 800
3 810
3 820
3 830
3 840
3 850
3 860
3 870
3 880
3 890
3 900
3 910
3 920
3 930
3 940
3 950
3 960
3 970
3 980
3 990
3 1000
3 1010
3 1020
3 1030
3 1040
3 1050
3 1060
3 1070
3 1080
3 1090
3 1100
3 1110
3 1120
3 1130
3 1140
3 1150
3 1160
3 1170
3 1180
3 1190
3 1200
3 1210
3 1220
3 1230
3 1240
3 1250
3 1260
3 1270
3 1280
3 1290
3 1300
3 1310
3 1320
3 1330
3 1340
3 1350
3 1360
3 1370
3 1380
3 1390
3 1400
3 1410
3 1420
3 1430
3 1440
3 1450
3 1460
3 1470
3 1480
3 1490
3 1500
3 1510
3 1520
3 1530
