In [151]:
import numpy as np
import random
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import copy


In [152]:
is_cuda = False
if torch.cuda.is_available():
    is_cuda = True

In [154]:
class Memory:

    def __init__(self, CAPACITY):
        self.capacity = CAPACITY 
        self.memory = []
        self.index = 0  
        self.save = 0
    
    def clear(self):
        self.memory = []
        self.index = 0
        
    def save(self,succ, ep_num,trial):
        np.savetxt(str(succ)+"_ep_"+str(ep_num)+"_"+str(trial)+".csv", np.array(self.memory),fmt = "%f", delimiter=",")
    
    def load(self,path):
        self.memory = np.loadtxt("path", delimiter=",").tolist()

    def stock_before(self, state, action):
        if len(self.memory) < self.capacity:
            self.memory.append([])  # 메모리가 가득차지 않은 경우
        else:
            np.savetxt("log_"+str(self.save)+".csv", np.array(self.memory),fmt = "%f", delimiter=",")
            self.save += 1
            self.clear()

        self.memory[self.index].append(state)
        self.memory[self.index].append(action)
    
    def stock_after(self,state_next,reward):
        self.memory[self.index].append(state_next)
        self.memory[self.index].append(reward)
        
        self.index = (self.index + 1) % self.capacity
        #print(self.memory)
        #print("")
    
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [155]:
class Net(nn.Module):

    def __init__(self, n_in, n_mid, n_out):
        """
        input of the layer
        [
             number of cars in this section, lane change stats(digitized),
             x coordinates of cars in this section(digitized), y coordinates of cars in this section(digitized),
             Vx of cars in this section(digitized), Vy coordinates of cars in this section(digitized),
             ax coordinates of cars in this section(digitized), ay coordinates of cars in this section(digitized),
             [map of the unit seciton],
             section 0 ~ 7
        ]
        """
        super(Net, self).__init__()
        self.fc1 = nn.Linear(n_in, n_mid)
        self.fc2 = nn.Linear(n_mid, n_mid)
        self.fc3 = nn.Linear(n_mid, n_mid)
        self.fc4 = nn.Linear(n_mid, n_mid)
        self.fc5 = nn.Linear(n_mid, n_out)
        

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = torch.atan(self.fc5(x))
        output = torch.tanh(x)
        return (output + 1) / 2

In [156]:

BATCH_SIZE = 32

class Brain:
    def __init__(self,n_in,n_mid,n_out,gamma):
        self.memory = Memory(10000)
        self.model = Net(n_in, n_mid, n_out).double()
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.01)
        self.gamma = gamma
        if is_cuda:
            self.model = self.model.cuda()
        
    def get_param(self,path):
        self.model.load_state_dict(torch.load(path))
        if is_cuda:
            self.model = self.model.cuda()
        self.model.eval()

    def save_param(self,path):
        if is_cuda:
            torch.save(self.model.cpu().state_dict(), path)
        else:
            torch.save(self.model.state_dict(), path)
        
    def modify_weight(self):
        if len(self.memory) < BATCH_SIZE:
            return
        
        mini_batch = self.memory.sample(BATCH_SIZE)
        #print(mini_batch)
        state_batch = torch.DoubleTensor(np.array(mini_batch)[:,0].tolist())
        action_batch = torch.DoubleTensor(np.array(mini_batch)[:,1].tolist())
        next_state_batch = torch.DoubleTensor(np.array(mini_batch)[:,2].tolist())
        reward_batch = torch.DoubleTensor(np.array(mini_batch)[:,3].tolist())
        
        if is_cuda:
            state_batch = state_batch.cuda()
            next_state_batch = next_state_batch.cuda()
            reward_batch = reward_batch.cuda()
        
        self.model.eval()
        state_action_batch = self.model(state_batch)
        next_state_values = self.model(next_state_batch)
        expected_action = reward_batch + self.gamma * next_state_values
        """print("before")
        print(self.model.state_dict())"""
        self.model.train()
        """print("state_action_batch")
        print(state_action_batch)
        print("expected_action")
        print(expected_action)"""
        loss = F.smooth_l1_loss(state_action_batch, expected_action)
        
        self.optimizer.zero_grad() 
        loss.backward() 
        self.optimizer.step()  
        """print("after")
        print(self.model.state_dict())"""
        
    def decide_action(self,state,epsilon):
        if epsilon <= np.random.uniform(0, 1):
            self.model.eval()
            with torch.no_grad():
                action = self.model(state)
            if is_cuda:
                action = action.cpu()
        else:
            action = torch.DoubleTensor([random.uniform(0,1), random.uniform(0,1)])

        return action
        

In [157]:
class Agent:
    def __init__(self):
        self.brain = Brain(19,128,2,0.99)

    def update_q_function(self):
        self.brain.modify_weight()

    def get_action(self,state,epsilon):
        action = self.brain.decide_action(state,epsilon)
        return action
        
    def save(self,succ,ep_num,trial):
        self.brain.memory.save(succ,ep_num,trial)
        
    def memory_clear(self):
        self.brain.memory.clear()

    def memorize_before(self,state,action):
        self.brain.memory.stock_before(state, action)
    
    def memorize_after(self,state_next,reward):
        self.brain.memory.stock_after(state_next,reward)


In [191]:
class Environment:

    def __init__(self,map_section, adj, car_limits, episodes, step):
        self.map_vector = map_section
        self.map = np.loadtxt("map.csv", dtype=int, delimiter=",").tolist()
        #self.map_shape = np.asarray(self.map.shape)
        self.scalar_limit = car_limits[0]
        self.vector_limit = car_limits[1]
        self.noc = 0
        self.adj = adj
        self.episodes = episodes
        self.step = step
        self.dt = 0.25
        self.agent = Agent()
        """
        episodes
        [
            [car_scalars,car_vectors],
            [car_scalars,car_vectors]
        ]
        step
        [
            [max trial of episodes[0],trial of episode[0]],
            [max trial of episodes[1],trial of episode[1]]
        ]
        """
    def mk_map(self):
        self.map  = np.full((500,500),5)
        for i in self.map_vector[0]:
            self.map[i[0]:i[2], i[1]] = 0
        for i in self.map_vector[1]:
            self.map[i[0], i[1]:i[3]] = 1
        for i in self.map_vector[2]:
            self.map[i[0], i[1]:i[3]] = 2
        for i in self.map_vector[3]:
            self.map[i[0]:i[2], i[1]] = 3
        for i in self.map_vector[4]:
            if i[1] == i[3]:
                self.map[i[0]:i[2], i[1]] = 4
            else:
                self.map[i[0], i[1]:i[3]] = 4
        for i in self.map_vector[6]:
            self.map[i[0]:i[2], i[1]:i[3]] = 4
        for i in self.map_vector[5]:
            self.map[i[0]:i[2], i[1]:i[3]] = 5
        np.savetxt("map.csv", self.map, fmt='%i', delimiter=",")
        
    def digitize_scalar(self,car,car_limits):
        car_temp = np.array(car).astype(int)
        result = np.ones(len(car_limits))
        for i in range(len(car_limits)):
            temp = car_temp[:,i]
            max_limit = car_limits[i]
            result[i] = sum([(x) * ((max_limit)**y) for y, x in enumerate(temp)])
        return result
    def digitize_vector(self,car,car_limits):
        car_temp = np.array(car).astype(int)
        result = np.ones(len(car_limits))
        for i in range(len(car_limits)):
            temp = car_temp[:,i]
            max_limit = car_limits[i]
            result[i] = sum([(x+car_limits[i]) * ((car_limits[i]*2)**y) for y, x in enumerate(temp)])
        return result
    def decompose(self,digitized,limit,num):
        temp = np.ones(num)
        for i in range(num):
            temp[i] = digitized % limit
            digitized //= limit
        return temp
    def decompose_scalar(self,digi,limit,noc):
        res = []
        for i in range(len(digi)):
            temp = self.decompose(digi[i],limit[i],noc)
            res.append(temp)
        return np.array(res).transpose()
    def decompose_vector(self,digi,limit,noc):
        res = []
        digi.astype(int)
        for i in range(len(digi)):
            temp = self.decompose(digi[i],limit[i]*2,noc) - limit[0]
            res.append(temp)
        return np.array(res).transpose()

    def get_state(self,time):
        ds = self.digitize_scalar(self.car_scalar,self.scalar_limit)
        dv = self.digitize_vector(self.car_vector,self.vector_limit)
        concas = np.concatenate((np.array([self.noc]), ds,dv,np.array([time]),np.array(self.adj)), axis=None)
        return torch.DoubleTensor(concas.tolist())
    
    def state_next(self,action):
        #print(np.array(action)*((self.vector_limit[2]*2)**(self.noc)))
        action = np.array(action)*((self.vector_limit[2]*2)**(self.noc))
        res = self.decompose_vector(action,self.vector_limit[2:],self.noc)
        #print(res)
        for i in range(self.noc):
            self.car_vector[i][2] += res[i][0]
            self.car_vector[i][3] += res[i][1]
            v_temp = [self.car_vector[i][0], self.car_vector[i][1]]
            self.car_vector[i][0] += self.car_vector[i][2] * self.dt
            self.car_vector[i][1] += self.car_vector[i][3] * self.dt
            self.car_scalar[i][1] += (v_temp[0] + self.car_vector[i][0]) /2 *self.dt
            self.car_scalar[i][2] += (v_temp[1] + self.car_vector[i][1]) /2 *self.dt
    
    def check(self,time,ep_num):
        if self.noc == 0:
            return True, [1]
        if time == self.step[ep_num][0] - 1:
            return False, [-1]
        loc = np.array(self.car_scalar)[:,3:].tolist()
        for i in range(len(loc)-1):
            for j in range(i+1,len(loc)):
                if (loc[i][0] == loc[j][0]) and (loc[i][1] == loc[j][1]):
                    #print("collision")
                    return True,[-1]
        #print(self.noc)
        for i in range(self.noc):
            temp = self.map[int(self.car_scalar[i][1])][int(self.car_scalar[i][2])]
            if temp == 5:
                #print("not on road")
                return True,[-1]
            if self.car_vector[i][0] > self.vector_limit[0] or self.car_vector[i][1] > self.vector_limit[1]:
                #print("over velocity")
                return False,[-1]
            if temp == 0:
                if self.car_vector[i][1] > 0:
                    #print("reverse")
                    return False,[-1]
            elif temp == 1:
                if self.car_vector[i][0] < 0:
                    #print("reverse")
                    return False,[-1]
            elif temp == 2:
                if self.car_vector[i][0] > 0:
                    #print("reverse")
                    return False,[-1]
            elif temp == 3:
                if self.car_vector[i][1] < 0:
                    #print("reverse")
                    return False,[-1]
            if temp == 4:
                self.car_scalar[i][0] += self.dt
            else:
                self.car_scalar[i][0] = 0
        lane_stat = 0
        des_list = []
        for i in range(self.noc):
            if (loc[i][0] == self.car_scalar[i][3]) and (loc[i][1] == self.car_scalar[i][4]) and self.car_vector[i][0] ==0 and self.car_vector[i][1] == 0:
                des_list.append(i)
                
            if self.car_scalar[i][0] > self.scalar_limit[0]:
                lane_stat = 1
        
        for i in des_list:
            self.car_scalar = np.delete(self.car_scalar, i, axis=0)
            self.car_vector = np.delete(self.car_vector, i, axis=0)
            self.noc -= 1
        
        if lane_stat == 1:
            return False, [-1]
        if self.noc == 0:
            #print("success")
            return True, [1]
        else:
            return False, [0]
        
    def run(self):
        for ep_num in range(len(self.episodes)):
            step_final = False
            episode = self.episodes[ep_num]
            complete_trial = 0
            for trial in range(self.step[ep_num][1]):      #trial
                """
                    scalar = [[lane_stat, x, y, des-x, des-y]]
                    vector = [[vx, vy, ax, ay]]
                """
                done = False
                self.noc = len(episode[0])
                self.car_scalar = copy.deepcopy(episode[0])
                self.car_vector = copy.deepcopy(episode[1])
                
                
                """print("-----in episode ",ep_num," trial ",trial,"------")
                print("episode ",episode)
                print("status : ",self.car_scalar,self.car_vector)
                print("<in trial ",trial,">")"""
                
                for time in range(self.step[ep_num][0]):   #stop after max trial
                    if time == 0:
                        state = self.get_state(time)
                    #print("state: ",state)
                    if is_cuda:
                        state = state.cuda()
                    #print("status : ",self.car_scalar,self.car_vector)
                    action = self.agent.get_action(state,(0.8 * (1 / (trial//50 + 1))))
                    #print(action)
                    self.agent.memorize_before(state.cpu().tolist(),action.cpu().tolist())
                    #print("before action ",action)
                    #print(self.vector_limit)
                    #print("action : ",action)
                    self.state_next(action)
                    
                    done, reward = self.check(time,ep_num)
                    #print("done : ",done,"reward : ",reward)
                    #print("status after : ",self.car_scalar,self.car_vector)
                    
                    state = self.get_state(time)
                    self.agent.memorize_after(state.tolist(),reward)
                    self.agent.update_q_function()
                
                    if done:
                        if(reward[0] == 1):
                            #print(self.car_vector)
                            print("ep ",ep_num," tiral ",trial," success in time ",time)
                            #self.agent.save("f_",ep_num,trial)
                            complete_trial += 1
                        else:
                            print("ep ",ep_num," tiral ",trial," failed in time ",time)
                            #self.agent.save("s_",ep_num,trial)
                            complete_trial = 0
                        #self.agent.memory_clear()
                        break
                        
                if complete_trial>10:
                    break
                    
            
        

In [192]:
map = [np.loadtxt("map0.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map1.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map2.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map3.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map4.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map5.csv", dtype=int, delimiter=",").tolist(),
       np.loadtxt("map6.csv", dtype=int, delimiter=",").tolist(),
      ]
adj = [-1,-1,-1,-1,-1,-1,-1,-1]
car_limit = [[4,500,500,500,500],[70,70,25,25]]
ep = [[[[0,0,300,280,300]],[[0,0,0,0]]],[[[0,299,317,100,318]],[[0,0,0,0]]],[[[0,0,300,280,300],[0,299,317,100,318]],[[0,0,0,0],[0,0,0,0]]]]
step = [[500,200],[240,200],[240,200]]


  


In [194]:
env = Environment(map, adj, car_limit, ep, step)
env.agent.brain.get_param("params/param")

env.run()

print(env.agent.brain.model.state_dict())
env.agent.brain.save_param("params/param")

ep  0  tiral  0  failed in time  0
ep  0  tiral  1  failed in time  5
ep  0  tiral  2  failed in time  4
ep  0  tiral  3  failed in time  0
ep  0  tiral  4  failed in time  3
ep  0  tiral  5  failed in time  0
ep  0  tiral  6  failed in time  0
ep  0  tiral  7  failed in time  6
ep  0  tiral  8  failed in time  0
ep  0  tiral  9  failed in time  0
ep  0  tiral  10  failed in time  1
ep  0  tiral  11  failed in time  2
ep  0  tiral  12  failed in time  0
ep  0  tiral  13  failed in time  0
ep  0  tiral  14  failed in time  4
ep  0  tiral  15  failed in time  0
ep  0  tiral  16  failed in time  3
ep  0  tiral  17  failed in time  3
ep  0  tiral  18  failed in time  2
ep  0  tiral  19  failed in time  0
ep  0  tiral  20  failed in time  4
ep  0  tiral  21  failed in time  0
ep  0  tiral  22  failed in time  0
ep  0  tiral  23  failed in time  5
ep  0  tiral  24  failed in time  0
ep  0  tiral  25  failed in time  2
ep  0  tiral  26  failed in time  4
ep  0  tiral  27  failed in time  0
ep

ep  1  tiral  25  failed in time  8
ep  1  tiral  26  failed in time  5
ep  1  tiral  27  failed in time  1
ep  1  tiral  28  failed in time  2
ep  1  tiral  29  failed in time  1
ep  1  tiral  30  failed in time  4
ep  1  tiral  31  failed in time  7
ep  1  tiral  32  failed in time  8
ep  1  tiral  33  failed in time  3
ep  1  tiral  34  failed in time  3
ep  1  tiral  35  failed in time  4
ep  1  tiral  36  failed in time  3
ep  1  tiral  37  failed in time  9
ep  1  tiral  38  failed in time  5
ep  1  tiral  39  failed in time  8
ep  1  tiral  40  failed in time  5
ep  1  tiral  41  failed in time  2
ep  1  tiral  42  failed in time  8
ep  1  tiral  43  failed in time  2
ep  1  tiral  44  failed in time  4
ep  1  tiral  45  failed in time  4
ep  1  tiral  46  failed in time  1
ep  1  tiral  47  failed in time  6
ep  1  tiral  48  failed in time  1
ep  1  tiral  49  failed in time  8
ep  1  tiral  50  failed in time  3
ep  1  tiral  51  failed in time  4
ep  1  tiral  52  failed in 

ep  2  tiral  50  failed in time  1
ep  2  tiral  51  failed in time  0
ep  2  tiral  52  failed in time  0
ep  2  tiral  53  failed in time  0
ep  2  tiral  54  failed in time  0
ep  2  tiral  55  failed in time  0
ep  2  tiral  56  failed in time  0
ep  2  tiral  57  failed in time  0
ep  2  tiral  58  failed in time  0
ep  2  tiral  59  failed in time  0
ep  2  tiral  60  failed in time  0
ep  2  tiral  61  failed in time  0
ep  2  tiral  62  failed in time  0
ep  2  tiral  63  failed in time  4
ep  2  tiral  64  failed in time  3
ep  2  tiral  65  failed in time  4
ep  2  tiral  66  failed in time  0
ep  2  tiral  67  failed in time  0
ep  2  tiral  68  failed in time  0
ep  2  tiral  69  failed in time  0
ep  2  tiral  70  failed in time  0
ep  2  tiral  71  failed in time  0
ep  2  tiral  72  failed in time  0
ep  2  tiral  73  failed in time  8
ep  2  tiral  74  failed in time  0
ep  2  tiral  75  failed in time  3
ep  2  tiral  76  failed in time  0
ep  2  tiral  77  failed in 

       device='cuda:0', dtype=torch.float64)), ('fc5.bias', tensor([-0.2751,  0.1751], device='cuda:0', dtype=torch.float64))])


In [195]:
random.uniform(0,1)


0.12479996222092404

In [None]:
OrderedDict([('fc1.weight', tensor([[ 0.1824, -0.0464,  0.0901,  ..., -0.0926, -0.1155, -0.4320],
        [-0.1883, -0.0564, -0.0636,  ..., -0.1283, -0.0588,  0.1303],
        [ 0.0165, -0.2002, -0.1623,  ..., -0.0321, -0.1244, -0.1171],
        ...,
        [ 0.0292, -0.1860,  0.2719,  ..., -0.2010, -0.2032, -0.0114],
        [ 0.2130,  0.0487, -0.0798,  ..., -0.0021,  0.1804, -0.1791],
        [-0.3188, -0.1977,  0.0807,  ...,  0.1682,  0.3072,  0.4516]],
       device='cuda:0', dtype=torch.float64)), ('fc1.bias', tensor([ 0.1475, -0.0898,  0.2787,  0.2013,  0.1294, -0.1444,  0.3007, -0.0523,
        -0.0723, -0.3733, -0.2445, -0.1790, -0.3836, -0.0865, -0.1814, -0.1508,
        -0.1147, -0.2031,  0.0707, -0.2613,  0.2230, -0.1986,  0.1623, -0.3064,
         0.0739,  0.2436, -0.1281,  0.0609,  0.0628,  0.0742, -0.3352, -0.0412,
         0.0811, -0.3991, -0.3922, -0.1117,  0.1932, -0.2706, -0.0274, -0.1051,
        -0.0322, -0.1521, -0.0120,  0.1544,  0.4070,  0.1716, -0.0404, -0.1112,
         0.2036, -0.2191, -0.1266,  0.1523,  0.2324, -0.0240,  0.0838, -0.0656,
        -0.3309,  0.2157, -0.1270,  0.0469,  0.2272,  0.3494, -0.1997, -0.0689,
         0.0253,  0.1479, -0.4197,  0.0217,  0.1846,  0.4082,  0.0564,  0.1996,
         0.4547, -0.1142, -0.2740,  0.1022, -0.3534,  0.0639, -0.1181,  0.2650,
        -0.0006,  0.3717,  0.0025, -0.1769,  0.0636,  0.0705,  0.0198, -0.0079,
        -0.4209,  0.0542, -0.2091,  0.1331,  0.0469, -0.0516, -0.0307,  0.0640,
         0.2258, -0.1891,  0.0903, -0.1822, -0.1990,  0.1042,  0.0480, -0.3946,
        -0.0240, -0.3179,  0.2764, -0.4160,  0.0988,  0.1386,  0.1733, -0.3363,
         0.0417, -0.2177,  0.2247, -0.3794, -0.2737, -0.1065, -0.0081, -0.0338,
         0.0397,  0.0416,  0.1179, -0.3685, -0.1984,  0.3610,  0.0071, -0.1631],
       device='cuda:0', dtype=torch.float64)), ('fc2.weight', tensor([[ 0.2410,  0.0442,  0.4154,  ...,  0.3079, -0.0167,  0.2421],
        [-0.2245, -0.0342, -0.3070,  ..., -0.2254, -0.0793, -0.0649],
        [ 0.0378, -0.0817, -0.0151,  ..., -0.0205,  0.0717, -0.0336],
        ...,
        [-0.3589,  0.0375, -0.3516,  ..., -0.3986, -0.0488, -0.1913],
        [ 0.1914, -0.0339,  0.1843,  ...,  0.2591, -0.0178,  0.2189],
        [ 0.2374, -0.0525,  0.3668,  ...,  0.3216, -0.0428,  0.1952]],
       device='cuda:0', dtype=torch.float64)), ('fc2.bias', tensor([ 2.8216e-01, -2.0771e-01,  3.5479e-02, -3.8576e-02, -1.4380e-01,
         2.9497e-02, -4.8456e-02, -2.8399e-01, -2.2882e-01,  2.7046e-01,
         2.9535e-01,  4.7131e-02,  2.7400e-01, -7.6377e-02, -1.5518e-02,
        -4.2317e-02, -2.3332e-01,  2.1120e-01, -1.8313e-01,  1.8472e-01,
         2.3316e-01,  2.8204e-01, -6.7554e-03,  2.1732e-01, -2.2266e-01,
         1.5507e-02, -8.0301e-05,  1.8835e-01, -3.0531e-02, -3.4607e-02,
         1.5089e-02, -3.9384e-02, -3.7193e-02,  2.3002e-01,  6.1446e-02,
         6.7815e-02,  5.5900e-02, -2.5312e-03, -1.9331e-01,  1.6608e-02,
        -2.4413e-01,  3.1744e-01, -1.1589e-01,  1.3784e-01,  5.0960e-02,
        -2.7876e-02, -4.4386e-02,  5.6833e-02,  5.7828e-02,  2.7353e-01,
         2.2045e-01, -2.3096e-01,  7.7636e-02, -4.4815e-02,  6.2795e-02,
        -1.6973e-01,  1.5255e-01, -2.4897e-01, -4.6276e-02, -4.4034e-02,
        -1.4108e-01, -2.5305e-02,  3.2598e-02,  5.8388e-02,  2.6805e-01,
        -7.4055e-02, -1.4982e-01,  7.0535e-02, -2.9547e-01,  3.0805e-01,
         3.1075e-01,  4.6418e-02, -1.3604e-01, -1.9921e-01, -1.4364e-01,
        -7.4680e-02, -2.0963e-01,  2.4471e-02, -1.6036e-01, -7.2160e-02,
         1.6027e-01,  2.1496e-01, -3.4007e-02,  9.5958e-02,  2.8028e-01,