In [None]:
#import torch
import numpy as np
import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
import os
os.environ["NGSIM_DIR"] = "/home/surender/Downloads/NGSIM"
os.environ["OPENDD_DIR"] = "/home/surender/Downloads/openDD"
os.environ["CARLA_PATH"] = "/home/surender/Downloads/carlaOld"
import sys
#sys.path.append('/home/surender/Downloads/CARLA_0.9.9.4/PythonAPI/carla/dist')
import carla
import random
import argparse

from carla_real_traffic_scenarios.carla_maps import CarlaMaps
from carla_real_traffic_scenarios.ngsim import NGSimDatasets, DatasetMode
from carla_real_traffic_scenarios.ngsim.scenario import NGSimLaneChangeScenario
from carla_real_traffic_scenarios.opendd.scenario import OpenDDScenario
from carla_real_traffic_scenarios.reward import RewardType
from carla_real_traffic_scenarios.scenario import Scenario

from carla_birdeye_view import BirdViewProducer, BirdViewCropType, PixelDimensions
from PIL import Image
#from IPython.display import clear_output, Image, display, HTML
import cv2

%matplotlib tk
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
import threading
import time
import math

In [None]:
if(torch.cuda.is_available()): 
    device = torch.device('cuda:0') 
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")

In [None]:
class RolloutBuffer:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []
    

    def clear(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]

In [None]:
'''
class Flatten(nn.Module):
    """Helper to flatten a tensor."""
    def forward(self, x):
        return x.view(x.size(0), -1)
'''
class Normal(nn.Module):
    """A module that builds a Diagonal Gaussian distribution from means.
    Standard deviations are learned parameters in this module.
    """
    def __init__(self, num_outputs):
        super().__init__()
        # initial variance is e^0 = 1
        self.stds = nn.Parameter(torch.zeros(num_outputs))

    def forward(self, x):
        dist = torch.distributions.Normal(loc=x, scale=self.stds.exp())

        # By default we get the probability of sampling each dimension of the
        # distribution. The full probability is the product of these, or
        # the sum since we're working with log probabilities.
        # So overwrite the log_prob function to handle this for us
        dist.old_log_prob = dist.log_prob
        dist.log_prob = lambda x: dist.old_log_prob(x).sum(-1)

        return dist
    
class AutoEncoder(nn.Module):
    def __init__(self,num_channels=3):
        super(AutoEncoder,self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(num_channels, 32, kernel_size=4, stride=2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 16, kernel_size=2, stride=2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 16, kernel_size=1, stride=1),
            
            nn.MaxPool2d(2, 2)
        )
        
        self.l1 = nn.ConvTranspose2d(16, 16, kernel_size = 8, stride=2)
        self.relu = nn.ReLU()
        self.l2 = nn.ConvTranspose2d(16, 32, kernel_size = 8, stride=2)
        self.l3 = nn.ConvTranspose2d(32, 3, kernel_size = 8, stride=1)
        self.l4 = nn.ConvTranspose2d(3,3,kernel_size = 20,stride = 5)
        self.sig = nn.Sigmoid()
        self.pool = nn.MaxPool2d(2,2)
        
    def forward(self,x):
        x = self.encoder(x)
        #x = self.flatten(x)
        #print(x.shape)
        x = self.l1(x)
        x = self.relu(x)
        #print(x.shape)
        x = self.l2(x)
        x = self.relu(x)
        #print(x.shape)
        x = self.l3(x)
        x = self.relu(x)
        #print(x.shape)
        x = self.l4(x)
        
        x = self.sig(x)
        print(x.shape)
        return x
        
class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init,hidden_size = 512,num_channels = 5):
        super(ActorCritic, self).__init__()

        self.has_continuous_action_space = has_continuous_action_space

        if has_continuous_action_space:
            self.action_dim = action_dim
            self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
        
        self.actor = nn.Sequential(
            nn.Conv2d(num_channels, 32, kernel_size=8, stride=2),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(4,2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2,1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=1),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2,1),
            #nn.Conv2d(128, 256, kernel_size=4, stride=1),
            #nn.BatchNorm2d(256),
            #nn.MaxPool2d(2,1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(124416, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,action_dim),
            nn.Tanh()
            
            
        )
        
        self.critic = nn.Sequential(
            nn.Conv2d(num_channels, 32, kernel_size=8, stride=2),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(4,2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2,1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=1),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2,1),
            #nn.Conv2d(128, 256, kernel_size=4, stride=1),
            #nn.BatchNorm2d(256),
            #nn.MaxPool2d(2,1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(124416, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,1),
            nn.Tanh()
            
            
        )
        
        
        #self.l1 = nn.Conv2d(num_channels, 32, kernel_size=4, stride=2)
        #self.l2 = nn.Conv2d(32, 16, kernel_size=4, stride=2)
        #self.l3 = nn.Conv2d(16, 8, kernel_size=3, stride=1)
        #self.p = nn.MaxPool2d(2, 2)
        ''''
        self.encoder = nn.Sequential(
            nn.Conv2d(num_channels, 32, kernel_size=4, stride=2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 16, kernel_size=4, stride=2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 16, kernel_size=3, stride=1),
            
            #nn.MaxPool2d(2, 2)
        )
        
        self.l1 = nn.ConvTranspose2d(16, 16, kernel_size = 8, stride=4)
        self.relu = nn.ReLU()
        self.l2 = nn.ConvTranspose2d(16, 32, kernel_size = 8, stride=1)
        self.l3 = nn.ConvTranspose2d(32, 3, kernel_size = 8, stride=2)
        self.sig = nn.Sigmoid()
        
        self.layer1 = nn.Conv2d(num_channels, 32, kernel_size=8, stride=4)
        self.relu = nn.ReLU();
        self.layer2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.layer3 = nn.Conv2d(64, 32, kernel_size=3, stride=1)
        self.flatten = nn.Flatten()
        self.layer4 = nn.Linear(9120,512)
        self.actol = nn.Linear(512, action_dim)
        self.acto = nn.Tanh()
        self.crit = nn.Linear(512, 1)
        '''
        #self.actor = nn.Sequential(self.main,nn.Linear(hidden_size, action_dim))
        #self.critic = nn.Sequential(self.main_two,nn.Linear(hidden_size, 1))
        '''
        # actor
        if has_continuous_action_space :
            self.actor = nn.Sequential(
                            nn.Linear(state_dim, 64),
                            nn.Tanh(),
                            nn.Linear(64, 64),
                            nn.Tanh(),
                            nn.Linear(64, action_dim),
                            nn.Tanh()
                        )
        else:
            self.actor = nn.Sequential(
                            nn.Linear(state_dim, 64),
                            nn.Tanh(),
                            nn.Linear(64, 64),
                            nn.Tanh(),
                            nn.Linear(64, action_dim),
                            nn.Softmax(dim=-1)
                        )

        
        # critic
        self.critic = nn.Sequential(
                        nn.Linear(state_dim, 64),
                        nn.Tanh(),
                        nn.Linear(64, 64),
                        nn.Tanh(),
                        nn.Linear(64, 1)
                    )
        '''
        
    def set_action_std(self, new_action_std):

        if self.has_continuous_action_space:
            self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")

    def AutoEncoder(self,x):
        '''
        x = self.encoder(x)
        print(x.shape)
        x = self.l1(x)
        x = self.relu(x)
        print(x.shape)
        x = self.l2(x)
        x = self.relu(x)
        print(x.shape)
        x = self.l3(x)
        x = self.sig(x)
        print(x.shape)
        
        print(x.shape)
        x = self.l1(x)
        print(x.shape)
        x = self.p(x)
        print(x.shape)
        x = self.l2(x)
        print(x.shape)
        x = self.p(x)
        print(x.shape)
        x = self.l3(x)
        print(x.shape)
        x = self.p(x)
        print(x.shape)
        '''
        #x = self.encoder(x)
        #return x
        pass
        

    def forward(self):
        raise NotImplementedError
        '''
        print(x.shape)
        x = self.layer1(x)
        #print(x.shape)
        x = self.relu(x)
        print(x.shape)
        x = self.layer2(x)
        #print(x.shape)
        x = self.relu(x)
        print(x.shape)
        x = self.layer3(x)
        #print(x.shape)
        x = self.relu(x)
        print(x.shape)
        x = self.flatten(x)
        print(x.shape)
        x = self.layer4(x)
        print(x.shape)
        a = self.actol(x)
        a = self.acto(a)
        v = self.crit(x)
        return a,v
        '''
        
    def backward(self, x):
        import pdb
        pdb.set_trace()
        return x
    

    def act(self, state):

        if self.has_continuous_action_space:
            #x = self.main(state)
            action_mean = self.actor(state)
            cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
            dist = MultivariateNormal(action_mean, cov_mat)
        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action = dist.sample()
        action_logprob = dist.log_prob(action)
        
        return action.detach(), action_logprob.detach()
    

    def evaluate(self, state, action):

        if self.has_continuous_action_space:
            if len(state.shape) == 3:
                state = state.reshape((1,5,186,150))
            #x = self.main(state)
            #print(state.shape)
            action_mean = self.actor(state)
            #print(action_mean.shape)
            action_var = self.action_var.expand_as(action_mean)
            #print(action_var,action_var.shape)
            cov_mat = torch.diag_embed(action_var).to(device)
            #print(cov_mat)
            dist = MultivariateNormal(action_mean, cov_mat)
            #print(dist)
            # for single action continuous environments
            if self.action_dim == 1:
                action = action.reshape(-1, self.action_dim)

        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_values = self.critic(state)
        #print(state_values,action_mean)
        return action_logprobs, state_values, dist_entropy


In [None]:
class PPO:
    def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):

        self.has_continuous_action_space = has_continuous_action_space

        if has_continuous_action_space:
            self.action_std = action_std_init

        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        
        self.buffer = RolloutBuffer()

        self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
        self.optimizer = torch.optim.Adam([
                        {'params': self.policy.actor.parameters(), 'lr': lr_actor},
                        
                        {'params': self.policy.critic.parameters(),'lr': lr_critic}
                        
                    ])
        self.optimizer2 = torch.optim.Adam([
            {'params': self.policy.critic.parameters(), 'lr': lr_critic}
        ])

        self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
        self.policy_old.load_state_dict(self.policy.state_dict())
        
        self.MseLoss = nn.MSELoss()


    def set_action_std(self, new_action_std):
        
        if self.has_continuous_action_space:
            self.action_std = new_action_std
            self.policy.set_action_std(new_action_std)
            self.policy_old.set_action_std(new_action_std)
        
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling PPO::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")


    def decay_action_std(self, action_std_decay_rate, min_action_std):
        #print("--------------------------------------------------------------------------------------------")

        if self.has_continuous_action_space:
            self.action_std = self.action_std - action_std_decay_rate
            self.action_std = round(self.action_std, 4)
            if (self.action_std <= min_action_std):
                self.action_std = min_action_std
                #print("setting actor output action_std to min_action_std : ", self.action_std)
            else:
                pass
                #print("setting actor output action_std to : ", self.action_std)
            self.set_action_std(self.action_std)

        else:
            print("WARNING : Calling PPO::decay_action_std() on discrete action space policy")

        #print("--------------------------------------------------------------------------------------------")


    def select_action(self, state):

        if self.has_continuous_action_space:
            with torch.no_grad():
                state = torch.FloatTensor(state).to(device)
                action, action_logprob = self.policy_old.act(state)

            self.buffer.states.append(state)
            self.buffer.actions.append(action)
            self.buffer.logprobs.append(action_logprob)

            return action.detach().cpu().numpy().flatten()

        else:
            with torch.no_grad():
                state = torch.FloatTensor(state).to(device)
                action, action_logprob = self.policy_old.act(state)
            
            self.buffer.states.append(state)
            self.buffer.actions.append(action)
            self.buffer.logprobs.append(action_logprob)

            return action.item()


    def update(self):

        # Monte Carlo estimate of returns
        rewards = []
        discounted_reward = 0
        #print(self.buffer.rewards)
        for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)
        #print("I",rewards)
        # Normalizing the rewards
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        #print(rewards,rewards.mean(),rewards.std(unbiased = False))
        rewards = (rewards - rewards.mean()) / (rewards.std(unbiased = False) + 1e-7)
        #print(rewards)
        # convert list to tensor
        old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
        old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
        old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)

        
        # Optimize policy for K epochs
        for _ in range(self.K_epochs):

            # Evaluating old actions and values
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
            #print(state_values)
            # match state_values tensor dimensions with rewards tensor
            state_values = torch.squeeze(state_values)
            
            #state_values = torch.
            # Finding the ratio (pi_theta / pi_theta__old)
            ratios = torch.exp(logprobs - old_logprobs.detach())
            #print(ratios)
            # Finding Spurrogate Loss
            advantages = rewards - state_values.detach()   
            #print(rewards)
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages
            #print(state_values)
            #print(state_values.shape)
            # final loss of clipped objective PPO
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy
            #print(surr1,surr2,state_values,rewards,dist_entropy,loss)
            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()
            #self.optimizer2.step()
            
        # Copy new weights into old policy
        self.policy_old.load_state_dict(self.policy.state_dict())

        # clear buffer
        self.buffer.clear()
    
    
    def save(self, checkpoint_path):
        torch.save(self.policy_old.state_dict(), checkpoint_path)
   

    def load(self, checkpoint_path):
        self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
        self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))

In [None]:
update_timestep = 1000     # update policy every n timesteps
K_epochs = 40               # update policy for K epochs
eps_clip = 0.2              # clip parameter for PPO
gamma = 0.99                # discount factor

lr_actor = 0.0003       # learning rate for actor network
lr_critic = 0.001       # learning rate for critic network

random_seed = 0 

In [None]:
def prepare_ngsim_scenario(client: carla.Client) -> Scenario:
    data_dir = os.environ.get("NGSIM_DIR")
    #data_dir = os.listdir('/home/surender/Downloads/NGSIM')
    assert data_dir, "Path to the directory with NGSIM dataset is required"
    ngsim_map = NGSimDatasets.list()
    ngsim_dataset = ngsim_map[1]
    client.load_world(ngsim_dataset.carla_map.level_path)
    return NGSimLaneChangeScenario(
        ngsim_dataset,
        dataset_mode=DatasetMode.TRAIN,
        data_dir=data_dir,
        reward_type=RewardType.DENSE,
        client=client,
    )

'''
def prepare_opendd_scenario(client: carla.Client) -> Scenario:
    data_dir = os.environ.get("OPENDD_DIR")
    assert data_dir, "Path to the directory with openDD dataset is required"
    maps = ["rdb1", "rdb2", "rdb3", "rdb4", "rdb5", "rdb6", "rdb7"]
    map_name = random.choice(maps)
    carla_map = getattr(CarlaMaps, map_name.upper())
    client.load_world(carla_map.level_path)
    return OpenDDScenario(
        client,
        dataset_dir=data_dir,
        dataset_mode=DatasetMode.TRAIN,
        reward_type=RewardType.DENSE,
        place_name=map_name,
    )

'''
def prepare_ego_vehicle(world: carla.World) -> carla.Actor:
    car_blueprint = world.get_blueprint_library().find("vehicle.audi.a2")

    # This will allow external scripts like manual_control.py or no_rendering_mode.py
    # from the official CARLA examples to take control over the ego agent
    car_blueprint.set_attribute("role_name", "hero")

    # spawn points doesnt matter - scenario sets up position in reset
    ego_vehicle = world.spawn_actor(
        car_blueprint, carla.Transform(carla.Location(0, 0, 500), carla.Rotation())
    )

    assert ego_vehicle is not None, "Ego vehicle could not be spawned"

    # Setup any car sensors you like, collect observations and then use them as input to your model
    return ego_vehicle

In [None]:
def cmd_carla():
    os.system("DISPLAY= /home/surender/Downloads/carlaOld/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl -Resx=300 -Resy=300 -NoVSync ")

In [None]:
def cmd_carla():
    os.system("DISPLAY= /home/surender/Downloads/carlaOld/CarlaUE4.sh -benchmark -fps=5 -quality-level=Low -opengl -Resx=4 -Resy=4 -NoVSync")

In [None]:
p = threading.Thread(target = cmd_carla)
p.start()


In [None]:
host = "localhost"
port = 2000
client = carla.Client(host,port)

In [None]:
torch.squeeze(torch.tensor([[1.4]]))

In [None]:
scenario = prepare_ngsim_scenario(client)

In [None]:
world = client.get_world()
spectator = world.get_spectator()
ego_vehicle = prepare_ego_vehicle(world)


In [None]:
settings = world.get_settings()
settings.no_rendering_mode = True
world.apply_settings(settings)

In [None]:
input_data= []

In [None]:
current_frame = 0

In [None]:
def check_img(img):
    global input_data,current_frame
    c_img = img
    #print(img.frame)
    array = np.frombuffer(img.raw_data, dtype=np.dtype("uint8"))
    #print(array.shape)
    array = np.reshape(array, (img.height, img.width, 4)) # RGBA format
    array = array[:, :, :3] #  Take only RGB
    #print(array.shape)
    #plt.imshow(array)
    
    img = Image.fromarray(array)
    
    #print(img)
    img = img.resize((320,320), Image.ANTIALIAS)
    #print(img)
    input_data = np.array(img)
    current_frame = c_img.frame

In [None]:
cam_bp = world.get_blueprint_library().find('sensor.camera.rgb')
cam_bp.set_attribute("image_size_x",str(320))
cam_bp.set_attribute("image_size_y",str(320))
cam_bp.set_attribute("fov",str(100))
cam_location = carla.Location(2,0,1)
cam_rotation = carla.Rotation(0,0,0)
cam_transform = carla.Transform(cam_location,cam_rotation)
ego_front_cam = world.spawn_actor(cam_bp,cam_transform,attach_to=ego_vehicle, attachment_type=carla.AttachmentType.Rigid)
#self.rgb_front_listener = ego_cam
ego_front_cam.listen(lambda image: check_img(image))

In [None]:
scenario.reset(ego_vehicle)

In [None]:
scenario.step(ego_vehicle)

In [None]:
c = world.tick()

In [None]:
scenario._target_lane_waypoint.transform.location.y

In [None]:
c

In [None]:
world.tick()

In [None]:
plt.imshow(input_data)

In [None]:
current_frame

In [None]:
p = PPO(1,2,lr_actor,lr_critic,gamma,K_epochs,eps_clip,True)

In [None]:
del p

In [None]:
p.load("Model_CHK4.mdl")

In [None]:
print(p.policy)

In [None]:
ae = AutoEncoder(3).to(device)

In [None]:
img = ae(state)

In [None]:
img = img.detach().cpu().numpy()

In [None]:
img = img.reshape((3,320,320))

In [None]:
img = img.reshape((320,320,3))

In [None]:
plt.imshow(img)

In [None]:
from torchsummary import summary

In [None]:
birdview_producer = BirdViewProducer(
    client,  # carla.Client
    target_size=PixelDimensions(width=150, height=186),
    pixels_per_meter=4,
    crop_type=BirdViewCropType.FRONT_AREA_ONLY
    )

In [None]:
birdview = birdview_producer.produce(
            agent_vehicle=ego_vehicle  # carla.Actor (spawned vehicle)
            )

In [None]:
ego_vehicle.get_location().y 

In [None]:
scenario._target_lane_waypoint.transform.location.y - ego_vehicle.get_location().y 

In [None]:
scenario._target_lane_waypoint.transform.location.x - ego_vehicle.get_location().x 

In [None]:
ego_vehi

In [None]:
v = ego_vehicle.get_velocity()

In [None]:
v.z

In [None]:
v.x

In [None]:
v.y

In [None]:
ego_vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0.0))


In [None]:
scenario.step(ego_vehicle)

In [None]:
world.tick()

In [None]:
birdview = birdview_producer.produce(
            agent_vehicle=ego_vehicle  # carla.Actor (spawned vehicle)
            )
rgb = BirdViewProducer.as_rgb(birdview)
cv2.imshow('Frame',rgb)

if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()


In [None]:
cv2.destroyAllWindows()

In [None]:
plt.imshow(birdview[0]) #Full Road Greyed out

In [None]:
plt.imshow(birdview[1])  #Lanes

In [None]:
plt.imshow(birdview[2]) #Centerlines

In [None]:
plt.imshow(birdview[3])#Other vehicles

In [None]:
plt.imshow(birdview[4])# Ego agent

In [None]:
in_data = birdview[:5,:,:]

In [None]:
in_data.shape

In [None]:
in_data = in_data.reshape((1,5,186,150))

In [None]:
np.max(in_data)

In [None]:
in_data.shape

In [None]:
p.select_action(state)

In [None]:
p.policy.actor(state.to(device)).shape

In [None]:
p.policy.forward(torch.FloatTensor(in_data).to(device))

In [None]:
import math

In [None]:
p.policy.actor.parameters()

In [None]:
with torch.no_grad():
    state = torch.FloatTensor(in_data)

In [None]:
state

In [None]:
state.shape

In [None]:
k = p.policy.AutoEncoder(state)

In [None]:
ae = AutoEncoder(3).to(device)
optimizer = torch.optim.Adam([
                        {'params': ae.parameters(), 'lr': 0.003}
])

criterion = nn.MSELoss()



In [None]:
inp = torch.cat((state,state), 0)

In [None]:
inp.shape[0]

In [None]:
del inp_tensor,outputs

In [None]:
A =  state

In [None]:
A/255.

In [None]:
in_data

In [None]:
state.shape

In [None]:
A.shape

In [None]:
epochs = 300
min_batch_size = 32
inp_tensor = state
ep_list = []
loss_list = []
for epoch in range(epochs):
    step = 0
    scenario.reset(ego_vehicle)
    c = world.tick()
    done = False
    total_r = 0
    val = 0
    
    while not done:
        in_data = input_data.reshape((1,3,320,320))
        with torch.no_grad():
            st = torch.FloatTensor(in_data)
        ego_vehicle.apply_control(carla.VehicleControl(throttle=0.5))
        try:
            cmd, reward, done, _ = scenario.step(ego_vehicle)
        except:
            break
        c = world.tick()
        inp_tensor = torch.cat((inp_tensor,st),0)
        del st
        if inp_tensor.shape[0] >= min_batch_size:
            break
        step += 1
    if inp_tensor.shape[0] >= min_batch_size:
            optimizer.zero_grad()
            A = inp_tensor/255.
            #A -= A.min(1, keepdim=True)[0]
            #A /= A.max(1, keepdim=True)[0]
            inp_tensor = A
            outputs = ae(inp_tensor.to(device))
            loss = criterion(outputs, inp_tensor.to(device))
            loss.backward()
            optimizer.step()
            print("EPOCH:",epoch,"LOSS:",loss.item())
            ep_list.append(epoch)
            loss_list.append(loss.item())
            del inp_tensor,outputs,A
            inp_tensor = state

In [None]:
BCE_loss = [ep_list,loss_list]

In [None]:
MSE_loss_1 = [ep_list,loss_list]

In [None]:
import pickle
f = open("MSE1.pkl",'wb')
pickle.dump(MSE_loss_1,f)
f.close()

In [None]:
#plt.ylim(0,1)
plt.plot(ep_list,loss_list)

In [None]:
in_data = state.detach().cpu().numpy()

In [None]:
i_img = in_data.reshape((3,320,320)).reshape((320,320,3))

In [None]:
plt.imshow(input_data)

In [None]:
plt.imshow(i_img)

In [None]:
state

In [None]:
out =ae(state.to(device))

In [None]:
oupp = out.detach().cpu().numpy()

In [None]:
o_img = oupp.reshape((3,320,320)).reshape((320,320,3))

In [None]:
plt.imshow(o_img)

In [None]:
k.shape

In [None]:
total_reward_list = []
epoch_list = []
step_list = []

In [None]:
torch.cuda.empty_cache()

In [None]:
epochs = 500
freq = 64
freq_n = 3
for epoch in range(epochs):
    step = 0
    scenario.reset(ego_vehicle)
    c = world.tick()
    done = False
    total_r = 0
    val = 0
    
    
    t_clip_n = 0.0
    t_clip_p = 1.0
    
    s_clip_n = -1.0
    s_clip_p = 1.0
    
    
    while not done:
        '''
        while True:
            #print(current_frame,c)
            if current_frame >= c:
                #print(current_frame,c)
                break
        '''
        birdview = birdview_producer.produce(
            agent_vehicle=ego_vehicle  # carla.Actor (spawned vehicle)
            )
        in_data = birdview[:5,:,:]
        in_data = in_data.reshape((1,5,186,150))
        #in_data = input_data.reshape((1,3,320,320))
        action = p.select_action(in_data)
        #print(action)
        '''
        if (val == 0  or val ==1):
            s_clip_n = -0.15
            s_clip_p = 0.15
            t_clip_n = 0.4
            t_clip_p = 1.0
        
        if (val == 2 or val == 5):
            s_clip_n = 0.25
            s_clip_p = 0.8
            t_clip_n = 0.0
            t_clip_p = 0.4
        
        if (val == 3 or val == 4):
            s_clip_n = -0.8
            s_clip_p = -0.25
            t_clip_n = 0.0
            t_clip_p = 0.4
        '''
            
        
        t_clip_n = 0.0
        t_clip_p = 1.0

        s_clip_n = -1.0
        s_clip_p = 1.0    
        
        brake = 0
        throttle = 0
        if action[0] <0:
            brake = action[0]
            throttle = 0
        else:
            throttle = action[0]
            brake = 0
        
        if (val == 0  or val ==1):
            s_clip_n = -0.15
            s_clip_p = 0.15
            t_clip_n = 0.4
            t_clip_p = 1.0
        
        if (val == 2 or val == 5):
            s_clip_n = 0.25
            s_clip_p = 0.8
            t_clip_n = 0.0
            t_clip_p = 0.4
        
        if (val == 3 or val == 4):
            s_clip_n = -0.8
            s_clip_p = -0.25
            t_clip_n = 0.0
            t_clip_p = 0.4
        
        #if epoch < 20:
        ego_vehicle.apply_control(carla.VehicleControl(throttle=np.clip(throttle, t_clip_n, t_clip_p), steer=np.clip(action[1], s_clip_n, s_clip_p),brake=np.clip(brake, 0.0, 1.0)))
        
        
        
        cmd, reward, done, _ = scenario.step(ego_vehicle)
        val = cmd.value
        #print(done)
        #if done:
        #    print(_)
        #print(_)
        
        
        v = ego_vehicle.get_velocity()
        kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
        
        if kmh < 60 & kmh > 0.2:
            #done = False
            reward += 1 #-1
            # Reward lighter steering when moving
            if np.abs(action[1]) < 0.3:
                reward += 1
            elif np.abs(action[1]) > 0.5 and np.abs(action[1]) < 0.9:
                reward -= 0.1
            elif np.abs(action[1]) >= 0.9:
                reward -= 0.2
        elif kmh < 0.2:
            reward -= 0.1
        else:
            #print("Maybe never")
            reward += 0.01
            if np.abs(action[1]) < 0.3:
                reward += 0.12
            # Reduce score for heavy steering
            if np.abs(action[1]) > 0.5 and np.abs(action[1]) < 0.9:
                reward -= 0.17
            elif np.abs(action[1]) >= 0.9:
                reward -= 0.21
        
        '''
        rgb = BirdViewProducer.as_rgb(birdview)
        cv2.imshow('Frame',rgb)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
        '''
        p.buffer.rewards.append(reward)
        p.buffer.is_terminals.append(done)
        
        total_r += reward
        step += 1
        
        if step % freq ==0 :
            print(step)
            p.update()
        if step % freq_n == 0:
            p.decay_action_std(0.05,0.005)
        c = world.tick()
    
    try:   
        p.update()
    except Exception as e:
        print("Error:",e)
        pass
    
    cv2.destroyAllWindows()
    print(total_r,epoch,step)
    total_reward_list.append(total_r)
    epoch_list.append(epoch)
    step_list.append(step)
    

In [None]:
History = [epoch_list,total_reward_list,step_list]

In [None]:
len(epoch_list)

In [None]:
len(epoch_list)

In [None]:
import pickle
f = open("History5.pkl",'wb')
pickle.dump(History,f)
f.close()

In [None]:
p.save("Model_CHK5.mdl")

In [None]:
len(total_reward_list)

In [None]:
epoch_list = epoch_list[500:]
total_reward_list = total_reward_list[500:]
step_list = step_list[500:]

In [None]:
#plt.ylim(-2,1)
plt.plot(epoch_list,total_reward_list)

In [None]:
cv2.destroyAllWindows()

In [None]:
320*320*3

In [None]:
input_data

In [None]:
in_data

In [None]:
in_data = input_data.reshape((1,3,320,320))

In [None]:
del p

In [None]:
p.select_action(in_data)

In [None]:
p.buffer.actions

In [None]:
p.buffer.rewards