# DQN Implmentation

### Installations

In [1]:
# # Install environment and agent
# !pip install highway-env
# !pip install --upgrade sympy torch


### Learning using existing model

The following is the pesudocode that will be followed when creating the DQN

Useful: https://www.youtube.com/watch?v=RVMpm86equc&list=PL58zEckBH8fCMIVzQCRSZVPUp3ZAVagWi&index=2

https://github.com/saashanair/rl-series/tree/master/dqn

https://github.com/johnnycode8/gym_solutions/blob/main/frozen_lake_dql.py

<img src="DQN.png" style="width: 900px;" align="left"/>


Potential Problems: https://www.reddit.com/r/reinforcementlearning/comments/1555wgi/dqn_loss_increasing_and_rewards_decreasing/


For CNN:

https://www.reddit.com/r/MachineLearning/comments/3l5qu7/rules_of_thumb_for_cnn_architectures/


In [2]:
import numpy as np
class SumTree:
    def __init__(self, capacity):
        self.capacity = capacity
        self.tree = np.zeros( 2 * capacity - 1 )
        self.data = np.zeros( capacity, dtype=object )
        self.writer = 0

    # propagate upwards to update the sum values
    def _propagate(self, index, change):
        parent = (index - 1) // 2

        self.tree[parent] += change

        if parent != 0:
            self._propagate(parent, change)

    # get the leaf nodes (Transaction)
    def _retrieve(self, index, s):
        left = 2 * index + 1
        right = left + 1

        if left >= len(self.tree):
            return index

        if s <= self.tree[left]:
            return self._retrieve(left, s)
        else:
            return self._retrieve(right, s-self.tree[left])

    def total(self):
        return self.tree[0]

    def add(self, p, data):
        index = self.write + self.capacity - 1

        self.data[self.write] = data
        self.update(index, p)

        self.write += 1
        # circular
        if self.write >= self.capacity:
            self.write = 0

    def update(self, index, p):
        change = p - self.tree[index]

        self.tree[index] = p
        self._propagate(index, change)

    def get(self, s):
        index = self._retrieve(0, s)
        data_index = index - self.capacity + 1

        return (index, self.tree[index], self.data[data_index])

In [236]:
import gymnasium as gym
import highway_env
import numpy as np
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torch.distributions as dist
import torch.nn.init as init


# Define model
class MLPNetwork(nn.Module):
    def __init__(self, in_states, h1_nodes, out_actions):
        super(MLPNetwork, self).__init__()

        # Define network layers
        self.fc1 = nn.Linear(in_states, h1_nodes)   # first fully connected layer
        self.out = nn.Linear(h1_nodes, out_actions) # output layer
        self.out2 = nn.Linear(out_actions, 1) # output layer
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.fc1(x)) # Apply rectified linear unit (ReLU) activation
        x = F.relu(self.out(x))         
        x = self.out2(x)
        x = self.softmax(x)
        return x

class CNN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(CNN, self).__init__()
        # greyscale Image is(stack,height,width)
        stack, height, width = input_shape
        self.conv = nn.Sequential(
            nn.Conv2d(stack,16,kernel_size=4, stride=2, padding=1),
            nn.ReLU(),

            nn.Conv2d(16,32,kernel_size=3),
            nn.ReLU(),

            nn.Conv2d(32,64, kernel_size=2),
            nn.ReLU(),
        )
        
        # This is for finding the size to dense more robust compared to decision manually
        with torch.no_grad():
                # Torch uses(1,channels,height,width)
                test = torch.zeros(1, stack, height, width)
                find_conv_size = self.conv(test)
                conv_size = find_conv_size.numel()
                
        self.out1 = nn.Linear(conv_size,num_actions)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self,x):
        x = self.conv(x)
        x = torch.flatten(x, start_dim=1)
        x = self.out1(x)
        x = self.softmax(x)
        return x

In [4]:
# Define memory for Experience Replay
# TODO: Prioritize, n-steps

from collections.__init__ import namedtuple

Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward', 'done'))

class ReplayMemory():
    def __init__(self, config, transition_type=Transition):
        self.capacity = config.get("capacity", 1000)
        self.index = 0
        self.transition_type = transition_type
        self.discount = config.get("discount", 0.99)

        self.memory = []
        self.device = config.get("device", torch.device("cpu"))
        self.n_steps = config.get("n_steps", 2)
    
    def store(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
            self.index = len(self.memory) - 1
        elif len(self.memory) > self.capacity:
            self.memory = self.memory[:self.capacity]
        # Faster than append and pop
        self.memory[self.index] = self.transition_type(*args)
        
        self.index = (self.index+1)%self.capacity # for circular memory

    def sample(self, batch_size, collapsed=True):
        if batch_size >  len(self.memory):
            batch_size = len(self.memory)
            
        if self.n_steps <= 1:
            # Directly sample transitions
            memories = random.sample(self.memory, batch_size)
            return self.unwrap_transition(*memories)
        else:
            # Sample initial transition indexes
            indexes = random.sample(range(len(self.memory)), batch_size)
            # Get the batch of n-consecutive-transitions starting from sampled indexes
            all_transitions = [self.memory[i:i+self.n_steps] for i in indexes]
            
            memories = map(self.collapse_n_steps, all_transitions) if collapsed else all_transitions

            return self.unwrap_transition(*memories)
        
    def collapse_n_steps(self, transitions):
        state, action, next_state, reward, done = transitions[0]
        discount = self.discount
        for transition in transitions[1:]:
            if done:
                break
            else:
                _, _, next_state, reward, done = transition
                discount *= self.discount
                reward += discount * reward
        return state, action, next_state, reward, done
    
    def unwrap_transition(self, *transition):
        state, action, next_state, reward, done = zip(*transition)
        
        states = torch.from_numpy(np.array(state)).float().to(self.device)
        actions = torch.from_numpy(np.array(action)).to(self.device)
        next_states = torch.from_numpy(np.array(next_state)).float().to(self.device)
        rewards = torch.from_numpy(np.array(reward)).float().to(self.device)
        dones = torch.from_numpy(np.array(done)).to(self.device)

        return states, actions, next_states, rewards, dones 

class PrioritizedReplayMemory(ReplayMemory):
    def __init__(self, config, transition_type=Transition):
        super().__init__(config)

        self.capacity = config.get("capacity", 1000)
        self.tree = SumTree(self.capacity)
        self.index = 0
        self.transition_type = transition_type

        self.memory = []
        self.device = config.get("device", torch.device("cpu"))
        
        self.alpha = config.get("alpha", 0.6)
        self.beta = config.get("beta", 0.2) #  will go to 1
        self.max_priority = 1  # priority for new samples, init as eps
        
    def store(self, *args):
        super().store(*args)
        self.tree.add(self.max_priority ** self.alpha, )
    
    def sample(self, batch_size, collapsed=True):
        priorities = torch.empty(batch_size, 1, dtype=torch.float)


In [5]:
from torch.utils.tensorboard import SummaryWriter
import os

class Metrics:
    def __init__(self, policy, result_file_name, use_metrics, time):
        self.use_metrics = use_metrics
        if not self.use_metrics:
            return
        new_num = str(len(os.listdir("./" +result_file_name)) + 1)
        file_name = f'{result_file_name}/{policy}_DQN_{new_num}_{time}'
        self.writer = SummaryWriter(log_dir=file_name, flush_secs=60)
            
    def add(self, type, y, x):
        if not self.use_metrics:
            return
        self.writer.add_scalar(type, y, x)
    def close(self):
        if not self.use_metrics:
            return
        self.writer.close()

In [226]:
import gymnasium as gym
import highway_env
import numpy as np
import random
import torch
import torch.optim as optim
import os
import datetime
from tqdm import tqdm
import json
import time

class DQNAgent:
    def __init__(self, params):
        self.q_net = {}
        self.q_target_net = {} 
        self.optimizer= {}        
        
        self.policy = params.get("policy", "CnnPolicy")        
        self.episode_num = params.get("episode_num", 10)

        self.epsilon = params.get("epsilon_max ", 1)
        self.epsilon_min = params.get("epsilon_min ", 0.1)
        self.epsilon_decay = params.get("epsilon_decay", 0.995)
        
        self.learning_rate = params.get("learning_rate", 5e-4)
        self.discount = params.get("discount", 0.2)
        self.batch_size = params.get("batch_size", 32)
        self.device = params.get("device", torch.device("cpu"))
        
        self.memory_capacity = params.get("memory_capacity", 1000)
        self.memory = {} # this is the memory buffer -> setting a limit
        self.n_steps = params.get("n_steps", 2)
        self.prioritize_memory = params.get("prioritize_memory", False)
        
        self.double = params.get("double", False)
        
        self.timeout = params.get("timeout_minute", 0) * 60 # in minutes
        ct = datetime.datetime.now()
        self.time = str(ct).replace(" ", "|")
        self.to_save_model = params.get("save_model", False)
        
        use_metrics = params.get("use_metrics", False)
        if use_metrics:
            self.create_folder("training_results")
            # self.save_params(params)
            
        self.metrics = Metrics(self.policy, "training_results", use_metrics, self.time)
        
    def initialize_weights(self, m):
        if isinstance(m, nn.Conv2d):
            # Xavier initialization for Conv2d weights
            init.xavier_uniform_(m.weight)
            init.zeros_(m.bias)
        elif isinstance(m, nn.Linear):
            # Xavier initialization for Linear weights
            init.xavier_uniform_(m.weight)
            init.zeros_(m.bias)

    def create_network(self, env):
        if self.policy == "CnnPolicy":
            self.create_CNN(env)
        
        if self.policy == "MlpPolicy":
            self.create_MLP_Network(env)
        
        self.q_net.apply(self.initialize_weights)    
        self.update_target_network()
        self.optimizer = optim.Adam(self.q_net.parameters(), lr=self.learning_rate)
        
    
    def create_CNN(self, env):
        self.num_states = env.observation_space.shape
        self.num_actions = env.action_space.n

        self.q_net = CNN(self.num_states, self.num_actions).to(self.device)
        self.q_target_net = CNN(self.num_states, self.num_actions).to(self.device)
        
    
    def create_MLP_Network(self, env):
        # the lanes
        self.num_states = env.observation_space.shape[1]
        self.num_actions = env.action_space.n

        self.q_net = MLPNetwork(self.num_states, self.num_states, self.num_actions).to(self.device)
        self.q_target_net = MLPNetwork(self.num_states, self.num_states, self.num_actions).to(self.device)
    
    def update_target_network(self):
        self.q_target_net.load_state_dict(self.q_net.state_dict())
    
    def learn(self, env):
        self.create_network(env)
        
        # if (self.prioritize_memory):
        self.memory = ReplayMemory({
            "capacity": self.memory_capacity,
            "device": self.device,
            "n_steps": self.n_steps,
        })

        self.prefill_memory(env, self.batch_size)

        start_time = time.time()        
        
        for epoch in tqdm(range(self.episode_num), desc="Training Model"):
            state = env.reset()[0]
            
            # True when agent reaches the end states (colliding or passing the time)
            done = False 
            
            # TODO: see how many actions until truncate
            # True when agent takes more than some actions 
            truncated = False
            episode_rewards = []
            episode_loss = []
            episode_len = 0
            while(not done and not truncated):
                # choose best action
                action = self.get_action(state)
                next_state, reward, done, truncated, _ = env.step(action)
                self.memory.store(state, action, next_state, reward, done)
                
                episode_loss.append(self.experience_replay())
                
                state = next_state
                
                episode_rewards.append(reward)
                episode_len += 1
                
            self.metrics.add("rollout/rewards", sum(episode_rewards) / len(episode_rewards), epoch)
            self.metrics.add("rollout/exploration-rate", self.epsilon, epoch)
            self.metrics.add("rollout/episode-length", episode_len, epoch)
            self.metrics.add("train/loss", sum(episode_loss) / len(episode_loss), epoch)
            
            if self.timeout:
                elapsed_time = time.time() - start_time
                if elapsed_time > self.timeout:
                    print("Timeout reached. Stopping training.\n")
                    break
            
            if epoch % 10 == 0:
                self.decay_epsilon()
                self.update_target_network()
            
        self.metrics.close()
        
        if self.to_save_model:
            self.save_model()
        
    # either the policies are able to get miltuple actions and into the NN or the input of NN should be able to handle all of these
    # output (one of): {0: 'LANE_LEFT', 1: 'IDLE', 2: 'LANE_RIGHT', 3: 'FASTER', 4: 'SLOWER'}
    def get_action(self, state, eval_mode=False):
        if random.random() <= self.epsilon and not eval_mode: # amount of exploration reduces with the epsilon value
            return random.randrange(self.num_actions)
        
        state = torch.tensor(np.array([state]), dtype=torch.float32).to(self.device)
        actions = self.q_net(state)
        return torch.argmax(actions).item()             

    def experience_replay(self):
        states, actions, next_states, rewards, dones = self.memory.sample(self.batch_size)
        
        q_pred = self.q_net(states)
        # q value of the action taken
        q_pred = q_pred.gather(1, actions.view(-1, 1)) 
        q_pred = q_pred.squeeze(1)

        # Double DQN
        if self.double:
             # pick best actions from policy network
            q_best_action = self.q_net(next_states)
            _, q_best_action = q_best_action.max(dim=1)
            q_best_action = q_best_action.unsqueeze(1)
            
            # use those actions for the target policy
            q_target = self.q_target_net(next_states)
            q_target = q_target.gather(1, q_best_action)
            q_target = q_target.squeeze(1)
        else:
            q_target = self.q_target_net(next_states)
            q_target = q_target.max(dim=1).values
        
        # setting Q(s',a') to 0 when the current state is a terminal state
        q_target[dones] = 0.0
        
        y_j = rewards + (self.discount * q_target)
        
        # calculate the loss as the mean-squared error of yj and qpred
        self.optimizer.zero_grad()
        loss = F.mse_loss(y_j, q_pred).mean()
        loss.backward()
        self.optimizer.step()
        
        return loss.item()
        
    def prefill_memory(self, env, prefill_num):
        for _ in tqdm(range(prefill_num), desc="Prefilling Memory "):
            done = False
            truncated = False
            state = env.reset()[0]

            while not done and not truncated:
                action = env.action_space.sample()
                next_state, reward, done, truncated, info = env.step(action)
                self.memory.store(state, action, next_state, reward, done)    
                
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_min, self.epsilon*self.epsilon_decay)
    
    def evaluate(self, env, episode_num):
        # add camera here
        for _ in tqdm(range(episode_num), desc="Evaluating Model"):
            state = env.reset()[0]  
            done = False      
            truncated = False 

            # Agent navigates map until it falls into a hole (terminated), reaches goal (terminated), or has taken 200 actions (truncated).
            while(not done and not truncated):  
                # Select best action   
                action = self.get_action(state, eval_mode=True)
                next_state, reward, done, truncated, info = env.step(action)
                state = next_state
                env.render()
        
    def save_model(self):
        folder_name = self.policy + "_save_models"
        self.create_folder(folder_name)
        new_model_num = str(len(os.listdir("./" +folder_name)) + 1)
        file_name = f'{folder_name}/DQN_{new_model_num}_{self.time}.pth'
        state = {'state_dict': self.q_net.state_dict(),
            'optimizer': self.optimizer.state_dict()}
        torch.save(state, file_name)
        
    def load_model(self, env, file_name):
        folder_name = self.policy + "_save_models"
                
        filename = folder_name + "/" + file_name + ".pth"
        self.create_network(env)
        
        models = torch.load(filename, map_location=self.device)
        
        self.q_net.load_state_dict(models['state_dict'])
        self.optimizer.load_state_dict(models['optimizer'])
    
    def save_params(self, params):
        folder_name = "hyperparameters"
        self.create_folder(folder_name)
        
        file_name = f'./{folder_name}/{self.policy}_DQN_{self.time}'
        with open(file_name + '.txt', 'w') as file:
            file.write(json.dumps(str(params)))

    def create_folder(self, directory_name):
        try:
            os.mkdir(directory_name)
            print(f"Directory '{directory_name}' created successfully.")
        except FileExistsError:
            return
        except PermissionError:
            print(f"Permission denied: Unable to create '{directory_name}'.")
        except Exception as e:
            print(f"An error occurred: {e}")


In [190]:
config = {}
policy = "CnnPolicy"
# policy = "MlpPolicy"

if policy == "CnnPolicy":
    config={
        "lanes_count" : 3,
        "observation": {
            "type": "GrayscaleObservation",
            "observation_shape": (128, 64),
            "stack_size": 4,
            "weights": [0.2989, 0.5870, 0.1140],  # weights for RGB conversion keep this conversion this is in the highway env page
            "scaling": 1.75,
        },
    }
else:
    config = {
        "lanes_count" : 3,
        "observation": {
            "type": "Kinematics",
            "vehicles_count": 5,
            "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
            "features_range": {
                "x": [-100, 100],
                "y": [-100, 100],
                "vx": [-20, 20],
                "vy": [-20, 20]
            },
            "absolute": False,
            "order": "sorted"
        }
    }

In [211]:
import torch
params = {
    'policy' : policy,
    'episode_num' : 10000,
    'discount' : 0.7,
    'batch_size' : 16,
    'learning_rate': 5e-6,
    'n_steps': 4,
    'double': True,
    'device' : torch.device("mps"),
    'memory_capacity' : 10000,
    'timeout_minute': 15,
    'use_metrics' : True,
    'save_model': True,
}

for seed in range(3):
    torch.manual_seed(seed)
    dqn_agent = DQNAgent(params)
    env = gym.make('highway-fast-v0', render_mode='rgb_array', config=config)
    dqn_agent.learn(env)

# env = gym.make('highway-v0', render_mode='rgb_array', config=config)
# dqn_agent.evaluate(env, 5)

# if you wanna save a model again
# dqn_agent.save_model("highway_dqn_model")

Prefilling Memory : 100%|██████████| 16/16 [00:02<00:00,  5.34it/s]
Training Model:  15%|█▌        | 1523/10000 [15:00<1:23:30,  1.69it/s]


Timeout reached. Stopping training.



Prefilling Memory : 100%|██████████| 16/16 [00:03<00:00,  5.12it/s]
Training Model:  14%|█▍        | 1448/10000 [15:00<1:28:37,  1.61it/s]


Timeout reached. Stopping training.



Prefilling Memory : 100%|██████████| 16/16 [00:02<00:00,  5.35it/s]
Training Model:  15%|█▍        | 1473/10000 [15:00<1:26:51,  1.64it/s]

Timeout reached. Stopping training.






In [237]:

env = gym.make('highway-v0', render_mode='rgb_array', config=config)

dqn_agent_test = DQNAgent(params)
dqn_agent_test.load_model(env, "DQN_4_2024-12-24|14:11:48.566160")

dqn_agent_test.evaluate(env, 20)


  models = torch.load(filename, map_location=self.device)
Evaluating Model:   0%|          | 0/20 [00:00<?, ?it/s]

tensor([[0.0185, 0.2510, 0.2137, 0.1976, 0.3192]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4049, 0.4373, 0.0794, 0.0192, 0.0591]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0151, 0.8696, 0.0988, 0.0057, 0.0108]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1319, 0.3868, 0.3957, 0.0533, 0.0323]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0477, 0.3249, 0.4347, 0.0774, 0.1153]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0148, 0.0608, 0.6403, 0.0468, 0.2372]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0093, 0.0891, 0.6170, 0.2387, 0.0459]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0201, 0.3056, 0.4747, 0.1253, 0.0744]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0129, 0.6341, 0.1415, 0.0842, 0.1273]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0917, 0.2915, 0.3395, 0.1494, 0.1278]], device='mps:0',
       grad_fn=

Evaluating Model:   5%|▌         | 1/20 [00:04<01:30,  4.77s/it]

tensor([[0.0221, 0.3604, 0.4267, 0.0916, 0.0992]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3926, 0.0366, 0.5376, 0.0075, 0.0257]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0572, 0.3040, 0.4567, 0.1623, 0.0198]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0435, 0.3207, 0.0895, 0.2204, 0.3258]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0789, 0.3173, 0.3057, 0.2595, 0.0386]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0073, 0.4538, 0.3775, 0.1127, 0.0487]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0149, 0.1094, 0.1438, 0.6891, 0.0428]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0276, 0.6583, 0.0192, 0.0200, 0.2750]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0413, 0.0296, 0.3465, 0.5556, 0.0270]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0878, 0.0422, 0.3482, 0.1348, 0.3870]], device='mps:0',
       grad_fn=

Evaluating Model:  10%|█         | 2/20 [00:14<02:22,  7.90s/it]

tensor([[0.0904, 0.2062, 0.4016, 0.1765, 0.1253]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0052, 0.0029, 0.9794, 0.0017, 0.0109]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[2.7469e-02, 1.1363e-02, 8.5962e-01, 8.0729e-05, 1.0147e-01]],
       device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[2.2540e-02, 5.7082e-02, 6.5208e-02, 5.1390e-04, 8.5466e-01]],
       device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0568, 0.1195, 0.0776, 0.0400, 0.7061]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0032, 0.0219, 0.9220, 0.0071, 0.0458]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1514, 0.1244, 0.3737, 0.1196, 0.2310]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.5780, 0.0806, 0.1396, 0.0167, 0.1851]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4861, 0.0193, 0.4086, 0.0028, 0.0832]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.6654, 0.0310, 0.0382, 0.0853, 0

Evaluating Model:  15%|█▌        | 3/20 [00:17<01:36,  5.67s/it]

tensor([[0.1103, 0.1489, 0.4807, 0.1402, 0.1200]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0303, 0.0268, 0.7640, 0.0433, 0.1356]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[7.8247e-02, 1.8874e-01, 5.0547e-01, 1.9150e-04, 2.2735e-01]],
       device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.7103, 0.0608, 0.1123, 0.0174, 0.0992]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  20%|██        | 4/20 [00:18<01:01,  3.83s/it]

tensor([[0.0458, 0.2912, 0.3854, 0.1823, 0.0952]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0026, 0.0041, 0.9701, 0.0073, 0.0160]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0923, 0.5949, 0.2589, 0.0008, 0.0532]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0225, 0.2183, 0.5709, 0.0011, 0.1872]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0060, 0.0064, 0.8961, 0.0478, 0.0437]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0121, 0.0633, 0.4721, 0.3611, 0.0915]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0102, 0.4078, 0.3536, 0.1421, 0.0864]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0928, 0.1084, 0.2678, 0.4141, 0.1169]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.5026, 0.2499, 0.0919, 0.0334, 0.1222]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0692, 0.2350, 0.1050, 0.1906, 0.4001]], device='mps:0',
       grad_fn=

Evaluating Model:  25%|██▌       | 5/20 [00:24<01:06,  4.45s/it]

tensor([[0.1584, 0.4283, 0.2946, 0.0299, 0.0888]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.6712, 0.0508, 0.2630, 0.0060, 0.0089]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1182, 0.8329, 0.0124, 0.0039, 0.0326]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4088, 0.3958, 0.0684, 0.0837, 0.0433]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1599, 0.0382, 0.2841, 0.0561, 0.4617]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0556, 0.0197, 0.8183, 0.0896, 0.0168]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0694, 0.5926, 0.2760, 0.0093, 0.0527]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0483, 0.0698, 0.7664, 0.0185, 0.0970]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0009, 0.0155, 0.5639, 0.3881, 0.0316]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2978, 0.1102, 0.4521, 0.0731, 0.0668]], device='mps:0',
       grad_fn=

Evaluating Model:  30%|███       | 6/20 [00:29<01:03,  4.57s/it]

tensor([[0.0654, 0.1330, 0.4584, 0.2436, 0.0997]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0947, 0.0164, 0.8249, 0.0321, 0.0319]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1763, 0.2033, 0.5796, 0.0062, 0.0346]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2534, 0.0475, 0.2149, 0.2994, 0.1848]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2373, 0.2251, 0.3746, 0.0568, 0.1061]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  35%|███▌      | 7/20 [00:30<00:45,  3.48s/it]

tensor([[0.0766, 0.3468, 0.2130, 0.1099, 0.2537]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4495, 0.1051, 0.3932, 0.0283, 0.0239]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0743, 0.2425, 0.4851, 0.0021, 0.1960]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1584, 0.3289, 0.0394, 0.0168, 0.4565]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.5149, 0.0122, 0.3429, 0.0247, 0.1052]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0024, 0.0118, 0.7903, 0.1343, 0.0612]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[2.5872e-02, 8.3777e-02, 3.2015e-04, 1.2443e-01, 7.6561e-01]],
       device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0079, 0.3610, 0.1351, 0.0537, 0.4423]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0119, 0.5447, 0.3017, 0.0702, 0.0714]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0029, 0.0276, 0.8959, 0.0615, 0.0122]], device='mps

Evaluating Model:  40%|████      | 8/20 [00:36<00:52,  4.42s/it]

tensor([[0.0816, 0.0539, 0.2977, 0.1495, 0.4174]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0811, 0.1930, 0.6008, 0.1019, 0.0231]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0531, 0.2177, 0.6842, 0.0205, 0.0246]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0480, 0.0698, 0.7894, 0.0311, 0.0617]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0572, 0.1081, 0.7356, 0.0252, 0.0739]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0723, 0.1166, 0.4220, 0.1368, 0.2523]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0084, 0.1229, 0.4982, 0.0557, 0.3149]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0430, 0.0334, 0.7343, 0.0534, 0.1358]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0579, 0.4632, 0.3902, 0.0203, 0.0684]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0107, 0.2847, 0.3671, 0.2291, 0.1084]], device='mps:0',
       grad_fn=

Evaluating Model:  45%|████▌     | 9/20 [00:47<01:08,  6.20s/it]

tensor([[0.0532, 0.1181, 0.2796, 0.1180, 0.4311]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3258, 0.1338, 0.3246, 0.1651, 0.0507]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1305, 0.1592, 0.4137, 0.0270, 0.2695]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3173, 0.0047, 0.2095, 0.0260, 0.4426]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0645, 0.0254, 0.5951, 0.0023, 0.3128]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0936, 0.0070, 0.2582, 0.3290, 0.3122]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2346, 0.0313, 0.3890, 0.1543, 0.1907]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1393, 0.2431, 0.1262, 0.3163, 0.1750]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.6054, 0.2977, 0.0356, 0.0085, 0.0529]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.8991, 0.0129, 0.0610, 0.0148, 0.0121]], device='mps:0',
       grad_fn=

Evaluating Model:  50%|█████     | 10/20 [00:49<00:51,  5.14s/it]

tensor([[0.0827, 0.2503, 0.2746, 0.0910, 0.3014]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1000, 0.0544, 0.6303, 0.1392, 0.0762]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2208, 0.2938, 0.4295, 0.0060, 0.0499]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0690, 0.0623, 0.4526, 0.2813, 0.1348]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0797, 0.1690, 0.3062, 0.2140, 0.2312]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0290, 0.0689, 0.6545, 0.0553, 0.1923]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0107, 0.1505, 0.6268, 0.0609, 0.1511]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0551, 0.0908, 0.7512, 0.0280, 0.0749]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0081, 0.1685, 0.4479, 0.2558, 0.1197]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0666, 0.3009, 0.4779, 0.0806, 0.0741]], device='mps:0',
       grad_fn=

Evaluating Model:  55%|█████▌    | 11/20 [00:59<00:59,  6.66s/it]

tensor([[0.0447, 0.0710, 0.7855, 0.0380, 0.0608]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0051, 0.0062, 0.9253, 0.0056, 0.0578]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0133, 0.3600, 0.4443, 0.0040, 0.1784]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4560, 0.0666, 0.1143, 0.0139, 0.3493]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3928, 0.0026, 0.2906, 0.0243, 0.2898]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3232, 0.0226, 0.0839, 0.0917, 0.4787]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0269, 0.4367, 0.3951, 0.0026, 0.1387]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2258, 0.3510, 0.1272, 0.2080, 0.0881]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2468, 0.4994, 0.0582, 0.0764, 0.1193]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1253, 0.3093, 0.0824, 0.4649, 0.0182]], device='mps:0',
       grad_fn=

Evaluating Model:  60%|██████    | 12/20 [01:04<00:47,  5.90s/it]

tensor([[0.1481, 0.4816, 0.2636, 0.0325, 0.0742]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.5156, 0.1671, 0.1735, 0.0062, 0.1376]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.6233, 0.3057, 0.0247, 0.0184, 0.0279]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1598, 0.3132, 0.1733, 0.3399, 0.0138]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0463, 0.1030, 0.5464, 0.0496, 0.2547]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0376, 0.0407, 0.0160, 0.8917, 0.0140]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0150, 0.1381, 0.7517, 0.0494, 0.0459]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  65%|██████▌   | 13/20 [01:05<00:32,  4.65s/it]

tensor([[0.0766, 0.3011, 0.1475, 0.0324, 0.4424]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3038, 0.1979, 0.2097, 0.1233, 0.1654]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0370, 0.5247, 0.3281, 0.0010, 0.1093]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2374, 0.0407, 0.0519, 0.0357, 0.6344]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.6052, 0.1271, 0.0264, 0.1971, 0.0442]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4880, 0.1392, 0.1228, 0.0206, 0.2295]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4669, 0.2114, 0.0286, 0.0684, 0.2247]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1526, 0.1594, 0.2683, 0.3505, 0.0691]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0411, 0.2163, 0.5465, 0.1654, 0.0307]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1661, 0.5460, 0.1939, 0.0465, 0.0474]], device='mps:0',
       grad_fn=

Evaluating Model:  70%|███████   | 14/20 [01:08<00:24,  4.09s/it]

tensor([[0.0575, 0.2738, 0.2200, 0.3308, 0.1180]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2610, 0.5088, 0.1200, 0.0132, 0.0969]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1101, 0.1673, 0.7044, 0.0019, 0.0162]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  75%|███████▌  | 15/20 [01:09<00:15,  3.09s/it]

tensor([[0.3106, 0.1816, 0.2329, 0.1450, 0.1299]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2396, 0.3635, 0.1694, 0.0805, 0.1470]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0208, 0.3036, 0.2062, 0.0792, 0.3902]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2232, 0.1363, 0.4809, 0.1421, 0.0174]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  80%|████████  | 16/20 [01:10<00:09,  2.47s/it]

tensor([[0.1771, 0.0909, 0.2946, 0.2666, 0.1709]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.5044, 0.0902, 0.2631, 0.0286, 0.1137]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0143, 0.1613, 0.4874, 0.0022, 0.3347]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1324, 0.0677, 0.1658, 0.0311, 0.6030]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2679, 0.0371, 0.4115, 0.0214, 0.2621]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  85%|████████▌ | 17/20 [01:11<00:06,  2.11s/it]

tensor([[0.0100, 0.2667, 0.4419, 0.1587, 0.1227]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.3149, 0.5005, 0.0892, 0.0761, 0.0193]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0525, 0.5910, 0.3385, 0.0052, 0.0128]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0237, 0.0603, 0.5692, 0.0205, 0.3264]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0113, 0.2258, 0.3678, 0.2581, 0.1370]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0325, 0.1075, 0.5599, 0.1017, 0.1984]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0121, 0.2718, 0.2993, 0.3194, 0.0973]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2143, 0.0857, 0.5692, 0.0830, 0.0478]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1245, 0.0151, 0.1517, 0.5701, 0.1386]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0268, 0.2788, 0.1802, 0.1215, 0.3927]], device='mps:0',
       grad_fn=

Evaluating Model:  90%|█████████ | 18/20 [01:14<00:04,  2.31s/it]

tensor([[0.0307, 0.1646, 0.5378, 0.2169, 0.0500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.4875, 0.0699, 0.3521, 0.0706, 0.0199]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1435, 0.4581, 0.3153, 0.0010, 0.0821]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0103, 0.2525, 0.5590, 0.1259, 0.0523]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0593, 0.0395, 0.1893, 0.2003, 0.5115]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0472, 0.0711, 0.1127, 0.3710, 0.3979]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0126, 0.1423, 0.6838, 0.0652, 0.0961]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0024, 0.2011, 0.6734, 0.0880, 0.0351]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1488, 0.2993, 0.4740, 0.0458, 0.0322]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2306, 0.4885, 0.2316, 0.0243, 0.0250]], device='mps:0',
       grad_fn=

Evaluating Model:  95%|█████████▌| 19/20 [01:24<00:04,  4.68s/it]

tensor([[0.0850, 0.2078, 0.2198, 0.2026, 0.2847]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0679, 0.6665, 0.1065, 0.0285, 0.1306]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0107, 0.6353, 0.2254, 0.0317, 0.0969]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1768, 0.3929, 0.1705, 0.2140, 0.0457]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.1787, 0.1172, 0.0955, 0.5234, 0.0852]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0756, 0.1094, 0.5357, 0.1828, 0.0964]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0094, 0.1014, 0.8182, 0.0552, 0.0158]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2297, 0.0324, 0.6684, 0.0154, 0.0542]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.2438, 0.0510, 0.1469, 0.0014, 0.5570]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0069, 0.0441, 0.2712, 0.0222, 0.6556]], device='mps:0',
       grad_fn=

Evaluating Model: 100%|██████████| 20/20 [01:32<00:00,  4.60s/it]


### Run the Tensorboard

In [8]:
%reload_ext tensorboard

%tensorboard --logdir training_results --host localhost --port 6010