In [1]:
import carla
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Normal
import learn2learn as l2l
import random
import csv
import time

import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(NeuralNetwork, self).__init__()

        self.l1 = nn.Linear(state_dim, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, action_dim)

    def forward(self, state):
        x = F.relu(self.l1(state))
        x = F.relu(self.l2(x))
        #action_logits = self.l3(x)
        actions = self.l3(x)
        #ction_probs = F.softmax(action_logits, dim=1)

        return actions


class CarlaEnvironment:
    def __init__(self, world, vehicle_blueprint, waypoints):
        self.world = world
        self.vehicle_blueprint = vehicle_blueprint
        self.waypoints = waypoints
        self.actorlist = []
        self.vehicle = self.world.spawn_actor(self.vehicle_blueprint, carla.Transform(carla.Location(x=-23.6,y=137.5,z=1),carla.Rotation(yaw=0)))
        
    def reset(self):
        self.vehicle.destroy()
        self.vehicle = self.world.spawn_actor(self.vehicle_blueprint, carla.Transform(carla.Location(x=-23.6,y=137.5,z=1),carla.Rotation(yaw=0)))  
        self.current_waypoint = 0
        return self.get_state()

    def step(self, action):
        action = action.detach().numpy()
        #print(action)
        # Apply actions: Throttle, Steering, Brake
        self.vehicle.apply_control(carla.VehicleControl(throttle = float(action[0]) , steer = float(action[1]) , brake = float(action[2])))
       
        #self.actorlist.append(self.vehicle)

        # Get next state
        next_state = self.get_state()

        # Calculate reward
        reward = self.calculate_reward(next_state) 

        # Check if the episode is done
        done = self.is_done(next_state)

        return next_state, reward, done

#     def get_state(self):
#         curr_vel = self.vehicle.get_velocity()
#         curr_yaw = self.vehicle.get_transform().rotation.yaw
#         waypoint = self.waypoints[self.current_waypoint]
#         return np.array([curr_vel, curr_yaw - waypoint[1], curr_yaw - waypoint[2], waypoint[3]])
    
    def get_state(self):
        state = []
        curr_yaw = self.vehicle.get_transform().rotation.yaw
        
        
        for i in range(10):
            if self.current_waypoint + i < len(self.waypoints):
                waypoint = self.waypoints[self.current_waypoint + i]
                curr_vel = self.vehicle.get_velocity()
                curr_loc = self.vehicle.get_location()
                vx = curr_vel.x
                vy = curr_vel.y
                v = ((vx)**2+(vy)**2)**0.5
                distance = curr_loc.distance(carla.Location(x=waypoint[4],y=waypoint[5]))
                state.extend([v, curr_yaw - waypoint[1], curr_yaw - waypoint[2], distance ])
            else:
                state.extend([0, 0, 0, 0])
        return np.array(state)

    def generate_waypoints(self):
        # Implement waypoint generation logic
        pass

    def calculate_reward(self, state):
        reward = -(np.linalg.norm(state[0] - self.waypoints[self.current_waypoint][0]) - (np.linalg.norm (state[1])) - (np.linalg.norm ( state[2])) - (np.linalg.norm (state[3])) + self.current_waypoint)
        return reward

    def is_done(self, state):
        if np.abs(state[3]) < 0.20:  # If the vehicle is close to the next waypoint
            self.current_waypoint += 1

        if self.current_waypoint >= len(self.waypoints):
            return True

        return False

    def set_weather(self, weather):
        self.world.set_weather(weather)
        
    def destroy(self):
        for actor in self.actorlist:
            actor.destroy()
        print("All Cleared")

def set_conditions(env, condition):
    
        precipitation = condition[0]
        precipitation_deposits = condition[1]
        tire_friction = condition[2]
        weather = carla.WeatherParameters( precipitation = precipitation,
                                            precipitation_deposits = precipitation_deposits)
        world.set_weather(weather)

        tire_condition = carla.WheelPhysicsControl(tire_friction = tire_friction)

        front_left_wheel  = carla.WheelPhysicsControl(tire_friction = tire_friction)
        front_right_wheel = carla.WheelPhysicsControl(tire_friction = tire_friction)
        rear_left_wheel   = carla.WheelPhysicsControl(tire_friction = tire_friction)
        rear_right_wheel  = carla.WheelPhysicsControl(tire_friction = tire_friction)

        wheels = [front_left_wheel, front_right_wheel, rear_left_wheel, rear_right_wheel]
        
        physics_control = env.vehicle.get_physics_control()
        physics_control.wheels = wheels
        env.vehicle.apply_physics_control(physics_control)
    

def train_ppo_maml(env, state_dim, action_dim, lr=1e-4, n_iters=1000000, inner_loop_steps=5):
    
 
    p_low = 0
    p_mid = 50
    p_high = 100

    pd_low = 50
    pd_high = 100

    tf_low = 0.5
    tf_mid = 1.5
    tf_high = 2.5
    
    conditions = [[p_low, pd_low, tf_high],
            [p_low, pd_low, tf_mid],
            [p_low, pd_high, tf_low],
            [p_mid, pd_low, tf_low],
            [p_mid, pd_high, tf_low],
            [p_mid, pd_low, tf_mid],
            [p_high, pd_low, tf_high],
            [p_high, pd_high, tf_low],
            [p_low, pd_high, tf_mid],
            [p_high, pd_low, tf_mid]]
    
    n_tasks = len(conditions)

    
    base_model = NeuralNetwork(state_dim, action_dim)
    maml = l2l.algorithms.MAML(base_model, lr = lr)
    optimizer = optim.Adam(maml.parameters(), lr = lr)
    iteration = 1
    for condition in conditions:
        
        set_conditions(env,condition)
    
        trajectories, state_tensors, action_tensors = sample_trajectories(env, maml)
        state_tensors_stacked = torch.stack(state_tensors)
        action_tensors_stacked = torch.stack(action_tensors)
        
        for _ in range(inner_loop_steps):
        
            learner = maml.clone()
            error = nn.MSELoss()(learner(state_tensors_stacked), action_tensors_stacked)
            learner.adapt(error)
            error = nn.MSELoss()(learner(state_tensors_stacked), action_tensors_stacked)
            #error.backward(retain_graph=True)

        # Evaluating the model
        p_evaluate = random.uniform(p_low, p_high)
        pd_evaluate = random.uniform(pd_low, pd_high)
        tf_evaluate = random.uniform(tf_low, tf_high)
        evaluating_condition = [p_evaluate, pd_evaluate, tf_evaluate]
        set_conditions(env, evaluating_condition)
        
        trajectories, state_tensors, action_tensors = sample_trajectories(env, maml)
        state_tensors_stacked = torch.stack(state_tensors)
        action_tensors_stacked = torch.stack(action_tensors)
        error = nn.MSELoss()(learner(state_tensors_stacked), action_tensors_stacked)
        error.backward(retain_graph=True)
        
        
        optimizer.zero_grad()
        optimizer.step()
        
        iteration += 1
        print("Iteration for {condition}: ", iteration)

        if iteration % 1000 == 0:
            print("Iteration for {condition}: ", iteration)


def sample_trajectories(env, model, num_episodes=10, max_episode_length=1000):
#     model.to("cpu")
    trajectories = []
    traj_states = []
    traj_action = []
    state_tensors = []
    action_tensors = []
       
    for _ in range(num_episodes):
        state = env.reset()                
        trajectory = []

        for j in range(max_episode_length):
            
#             state_tensor = torch.tensor(state).to(torch.float32)
            state_tensor = torch.from_numpy(state.astype(np.float32))
            state_tensors.append(state_tensor)
            action_tensor = model(state_tensor)
#             action_distribution = Normal(action_mean, torch.tensor(0.1))
#             action = action_distribution.sample().detach().numpy().astype(float)
            
            action = action_tensor
#             action_tensor = torch.tensor(action, dtype=torch.float32)
            action_tensors.append(action)

            next_state, reward, done = env.step(action)
            trajectory.append((state, action, reward, next_state, done))

            state = next_state
            
            if not j%200 : print("Reward: ", reward, '  J: ', j )
            #state_tensors = torch.tensor(state_tensor, dtype=torch.float32)
            if done or j==max_episode_length-1:
                #env.vehicle.destroy()
                break  

        trajectories.append(trajectory)
    return trajectories, state_tensors, action_tensors

            
def read_waypoints(file_path):
    waypoints = []
    with open(file_path, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            v, yaw_c, yaw_n, r, x, y = map(float, row)
            waypoints.append((v, yaw_c, yaw_n, r, x, y))
            
    return waypoints                        

In [2]:
if __name__ == '__main__':

    client = carla.Client('localhost', 2000)
    client.set_timeout(2.0)

    world = client.get_world()
    blueprint_library = world.get_blueprint_library()
    vehicle_blueprint = blueprint_library.filter("wrangler_rubicon")[0]

    waypoints = read_waypoints("waypoint_with_xy.csv")
    env = CarlaEnvironment(world, vehicle_blueprint, waypoints)  # Implement CarlaEnvironment with the required state and action spaces

    state_dim = 40
    action_dim = 3
    train_ppo_maml(env, state_dim, action_dim)
    

    
        



Reward:  139.78680419921875   J:  0
Reward:  1.6699888110160828   J:  200
Reward:  1.4970015287399292   J:  400
Reward:  1.3412280082702637   J:  600
Reward:  1.2657466530799866   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7433509826660156   J:  200
Reward:  1.7132707834243774   J:  400
Reward:  1.6200791597366333   J:  600
Reward:  1.503084659576416   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7746806144714355   J:  200
Reward:  1.7186057567596436   J:  400
Reward:  1.621147871017456   J:  600
Reward:  1.55790776014328   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7584391832351685   J:  200
Reward:  1.7279086112976074   J:  400
Reward:  1.632849097251892   J:  600
Reward:  1.5005521178245544   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7436023950576782   J:  200
Reward:  1.712868332862854   J:  400
Reward:  1.6231322884559631   J:  600
Reward:  1.4943248629570007   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7698469161987305   

Reward:  139.78680419921875   J:  0
Reward:  1.7137510776519775   J:  200
Reward:  1.6254312992095947   J:  400
Reward:  1.5030986070632935   J:  600
Reward:  1.4329890608787537   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7738171815872192   J:  200
Reward:  1.7494022846221924   J:  400
Reward:  1.6748443245887756   J:  600
Reward:  1.5742568373680115   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7720532417297363   J:  200
Reward:  1.7178140878677368   J:  400
Reward:  1.624607503414154   J:  600
Reward:  1.5055365562438965   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7746049165725708   J:  200
Reward:  1.7177900075912476   J:  400
Reward:  1.6751168966293335   J:  600
Reward:  1.5675711035728455   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7727339267730713   J:  200
Reward:  1.7474547624588013   J:  400
Reward:  1.6740996837615967   J:  600
Reward:  1.6240941286087036   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.77480459213256

Reward:  139.78680419921875   J:  0
Reward:  1.7758145332336426   J:  200
Reward:  1.7250967025756836   J:  400
Reward:  1.6884304881095886   J:  600
Reward:  1.5934617519378662   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7755264043807983   J:  200
Reward:  1.7274352312088013   J:  400
Reward:  1.6864164471626282   J:  600
Reward:  1.5869842171669006   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7475073337554932   J:  200
Reward:  1.6943602561950684   J:  400
Reward:  1.6024456024169922   J:  600
Reward:  1.5520419478416443   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7764499187469482   J:  200
Reward:  1.7285726070404053   J:  400
Reward:  1.6922542452812195   J:  600
Reward:  1.602317988872528   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7734977006912231   J:  200
Reward:  1.72649085521698   J:  400
Reward:  1.6470374464988708   J:  600
Reward:  1.5431002378463745   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7775896787643433

Reward:  1.6577802896499634   J:  600
Reward:  1.6041189432144165   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7704294919967651   J:  200
Reward:  1.741071343421936   J:  400
Reward:  1.6613979935646057   J:  600
Reward:  1.6011118292808533   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7467306852340698   J:  200
Reward:  1.7088878154754639   J:  400
Reward:  1.6071780920028687   J:  600
Reward:  1.5380882024765015   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.773300290107727   J:  200
Reward:  1.7115010023117065   J:  400
Reward:  1.6618282198905945   J:  600
Reward:  1.6038666367530823   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7726022005081177   J:  200
Reward:  1.7405723333358765   J:  400
Reward:  1.6577790975570679   J:  600
Reward:  1.5945109724998474   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.772457242012024   J:  200
Reward:  1.71019446849823   J:  400
Reward:  1.664695680141449   J:  600
Reward:  1.546335756778717  

Reward:  1.5355716943740845   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7710862159729004   J:  200
Reward:  1.7446426153182983   J:  400
Reward:  1.5388233065605164   J:  600
Reward:  1.4688900113105774   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7726202011108398   J:  200
Reward:  1.7378228902816772   J:  400
Reward:  1.6557130217552185   J:  600
Reward:  1.608519971370697   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.7725284099578857   J:  200
Reward:  1.7485613822937012   J:  400
Reward:  1.6694581508636475   J:  600
Reward:  1.6203457117080688   J:  800
Reward:  139.78680419921875   J:  0
Reward:  1.770721673965454   J:  200
Reward:  1.7449568510055542   J:  400
Reward:  1.6632604002952576   J:  600
Reward:  1.6079995036125183   J:  800
Iteration for {condition}:  10
Reward:  139.78680419921875   J:  0
Reward:  1.771622896194458   J:  200
Reward:  1.711837649345398   J:  400
Reward:  1.661904215812683   J:  600
Reward:  1.5448241233825684   J:  