# DQN Implmentation

### Installations

In [1]:
# # Install environment and agent
# !pip install highway-env
# !pip install --upgrade sympy torch


### Learning using existing model

The following is the pesudocode that will be followed when creating the DQN

Useful: https://www.youtube.com/watch?v=RVMpm86equc&list=PL58zEckBH8fCMIVzQCRSZVPUp3ZAVagWi&index=2

https://github.com/saashanair/rl-series/tree/master/dqn

https://github.com/johnnycode8/gym_solutions/blob/main/frozen_lake_dql.py

<img src="DQN.png" style="width: 900px;" align="left"/>


Potential Problems: https://www.reddit.com/r/reinforcementlearning/comments/1555wgi/dqn_loss_increasing_and_rewards_decreasing/


For CNN:

https://www.reddit.com/r/MachineLearning/comments/3l5qu7/rules_of_thumb_for_cnn_architectures/


In [2]:
import numpy as np
class SumTree:
    def __init__(self, capacity):
        self.capacity = capacity
        self.tree = np.zeros( 2 * capacity - 1 )
        self.data = np.zeros( capacity, dtype=object )
        self.writer = 0

    # propagate upwards to update the sum values
    def _propagate(self, index, change):
        parent = (index - 1) // 2

        self.tree[parent] += change

        if parent != 0:
            self._propagate(parent, change)

    # get the leaf nodes (Transaction)
    def _retrieve(self, index, s):
        left = 2 * index + 1
        right = left + 1

        if left >= len(self.tree):
            return index

        if s <= self.tree[left]:
            return self._retrieve(left, s)
        else:
            return self._retrieve(right, s-self.tree[left])

    def total(self):
        return self.tree[0]

    def add(self, p, data):
        index = self.write + self.capacity - 1

        self.data[self.write] = data
        self.update(index, p)

        self.write += 1
        # circular
        if self.write >= self.capacity:
            self.write = 0

    def update(self, index, p):
        change = p - self.tree[index]

        self.tree[index] = p
        self._propagate(index, change)

    def get(self, s):
        index = self._retrieve(0, s)
        data_index = index - self.capacity + 1

        return (index, self.tree[index], self.data[data_index])

In [105]:
import gymnasium as gym
import highway_env
import numpy as np
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torch.distributions as dist

# Define model
class MLPNetwork(nn.Module):
    def __init__(self, in_states, h1_nodes, out_actions):
        super(MLPNetwork, self).__init__()

        # Define network layers
        self.fc1 = nn.Linear(in_states, h1_nodes)   # first fully connected layer
        self.out = nn.Linear(h1_nodes, out_actions) # output layer
        self.out2 = nn.Linear(out_actions, 1) # output layer
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.fc1(x)) # Apply rectified linear unit (ReLU) activation
        x = F.relu(self.out(x))         
        x = self.out2(x)
        x = self.softmax(x)
        return x

class CNN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(CNN, self).__init__()
        # greyscale Image is(stack,height,width)
        stack, height, width = input_shape
        self.conv = nn.Sequential(
            nn.Conv2d(stack,32,kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=4,stride=2),
            nn.Dropout(0.1),

            nn.Conv2d(32,64,kernel_size=3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2,stride=2),
            nn.Dropout(0.1),

            nn.Conv2d(64,128, kernel_size=3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2,stride=2),
            nn.Dropout(0.1),
            
            nn.Conv2d(128,256, kernel_size=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=1,stride=2),
            nn.Dropout(0.1),
        )
        
        # This is for finding the size to dense more robust compared to decision manually
        with torch.no_grad():
                # Torch uses(1,channels,height,width)
                test = torch.zeros(1, stack, height, width)
                find_conv_size = self.conv(test)
                conv_size = find_conv_size.numel()
        self.fc = nn.Linear(conv_size,num_actions)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.5)
        # self.tan = nn.Tanh(dim=1)


    def forward(self,x):
        x = self.conv(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        x = self.dropout(x)
        x = self.softmax(x)
        return x

In [103]:
# Define memory for Experience Replay
# TODO: Prioritize, n-steps

from collections.__init__ import namedtuple

Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward', 'done'))

class ReplayMemory():
    def __init__(self, config, transition_type=Transition):
        self.capacity = config.get("capacity", 1000)
        self.index = 0
        self.transition_type = transition_type
        self.discount = config.get("discount", 0.99)

        self.memory = []
        self.device = config.get("device", torch.device("cpu"))
        self.n_steps = config.get("n_steps", 2)
    
    def store(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
            self.index = len(self.memory) - 1
        elif len(self.memory) > self.capacity:
            self.memory = self.memory[:self.capacity]
        # Faster than append and pop
        self.memory[self.index] = self.transition_type(*args)
        
        self.index = (self.index+1)%self.capacity # for circular memory

    def sample(self, batch_size, collapsed=True):
        if batch_size >  len(self.memory):
            batch_size = len(self.memory)
            
        if self.n_steps <= 1:
            # Directly sample transitions
            memories = random.sample(self.memory, batch_size)
            return self.unwrap_transition(*memories)
        else:
            # Sample initial transition indexes
            indexes = random.sample(range(len(self.memory)), batch_size)
            # Get the batch of n-consecutive-transitions starting from sampled indexes
            all_transitions = [self.memory[i:i+self.n_steps] for i in indexes]
            
            memories = map(self.collapse_n_steps, all_transitions) if collapsed else all_transitions

            return self.unwrap_transition(*memories)
        
    def collapse_n_steps(self, transitions):
        state, action, next_state, reward, done = transitions[0]
        discount = self.discount
        for transition in transitions[1:]:
            if done:
                break
            else:
                _, _, next_state, reward, done = transition
                discount *= self.discount
                reward += discount * reward
        return state, action, next_state, reward, done
    
    def unwrap_transition(self, *transition):
        state, action, next_state, reward, done = zip(*transition)
        
        states = torch.from_numpy(np.array(state)).float().to(self.device)
        actions = torch.from_numpy(np.array(action)).to(self.device)
        next_states = torch.from_numpy(np.array(next_state)).float().to(self.device)
        rewards = torch.from_numpy(np.array(reward)).float().to(self.device)
        dones = torch.from_numpy(np.array(done)).to(self.device)

        return states, actions, next_states, rewards, dones 

class PrioritizedReplayMemory(ReplayMemory):
    def __init__(self, config, transition_type=Transition):
        super().__init__(config)

        self.capacity = config.get("capacity", 1000)
        self.tree = SumTree(self.capacity)
        self.index = 0
        self.transition_type = transition_type

        self.memory = []
        self.device = config.get("device", torch.device("cpu"))
        
        self.alpha = config.get("alpha", 0.6)
        self.beta = config.get("beta", 0.2) #  will go to 1
        self.max_priority = 1  # priority for new samples, init as eps
        
    def store(self, *args):
        super().store(*args)
        self.tree.add(self.max_priority ** self.alpha, )
    
    def sample(self, batch_size, collapsed=True):
        priorities = torch.empty(batch_size, 1, dtype=torch.float)


In [5]:
from torch.utils.tensorboard import SummaryWriter
import os

class Metrics:
    def __init__(self, policy, result_file_name, use_metrics, time):
        self.use_metrics = use_metrics
        if not self.use_metrics:
            return
        new_num = str(len(os.listdir("./" +result_file_name)) + 1)
        file_name = f'{result_file_name}/{policy}_DQN_{new_num}_{time}'
        self.writer = SummaryWriter(log_dir=file_name, flush_secs=60)
            
    def add(self, type, y, x):
        if not self.use_metrics:
            return
        self.writer.add_scalar(type, y, x)
    def close(self):
        if not self.use_metrics:
            return
        self.writer.close()

In [143]:
import gymnasium as gym
import highway_env
import numpy as np
import random
import torch
import torch.optim as optim
import os
import datetime
from tqdm import tqdm
import json
import time

class DQNAgent:
    def __init__(self, params):
        self.q_net = {}
        self.q_target_net = {} 
        self.optimizer= {}        
        
        self.policy = params.get("policy", "CnnPolicy")        
        self.episode_num = params.get("episode_num", 10)

        self.epsilon = params.get("epsilon_max ", 1)
        self.epsilon_min = params.get("epsilon_min ", 0.1)
        self.epsilon_decay = params.get("epsilon_decay", 0.995)
        
        self.learning_rate = params.get("learning_rate", 3e-4)
        self.discount = params.get("discount", 0.2)
        self.batch_size = params.get("batch_size", 32 )
        self.device = params.get("device", torch.device("cpu"))
        
        self.memory_capacity = params.get("memory_capacity", 1000)
        self.memory = {} # this is the memory buffer -> setting a limit
        self.n_steps = params.get("n_steps", 2)
        self.prioritize_memory = params.get("prioritize_memory", False)
        
        self.double = params.get("double", False)
        
        self.timeout = params.get("timeout_minute", 0) * 60 # in minutes
        ct = datetime.datetime.now()
        self.time = str(ct).replace(" ", "|")
        self.to_save_model = params.get("save_model", False)
        
        use_metrics = params.get("use_metrics", False)
        if use_metrics:
            self.create_folder("training_results")
            self.save_params(params)
            
        self.metrics = Metrics(self.policy, "training_results", use_metrics, self.time)
        
    def create_network(self, env):
        if self.policy == "CnnPolicy":
            self.create_CNN(env)
        
        if self.policy == "MlpPolicy":
            self.create_MLP_Network(env)
            
        self.update_target_network()
        self.optimizer = optim.Adam(self.q_net.parameters(), lr=1e-3)
    
    def create_CNN(self, env):
        self.num_states = env.observation_space.shape
        self.num_actions = env.action_space.n

        self.q_net = CNN(self.num_states, self.num_actions).to(self.device)
        self.q_target_net = CNN(self.num_states, self.num_actions).to(self.device)
        
    
    def create_MLP_Network(self, env):
        # the lanes
        self.num_states = env.observation_space.shape[1]
        self.num_actions = env.action_space.n

        self.q_net = MLPNetwork(self.num_states, self.num_states, self.num_actions).to(self.device)
        self.q_target_net = MLPNetwork(self.num_states, self.num_states, self.num_actions).to(self.device)
    
    def update_target_network(self):
        # make the weights and biases the same
        self.q_target_net.load_state_dict(self.q_net.state_dict())
    
    def learn(self, env):
        self.create_network(env)
        
        # if (self.prioritize_memory):
        self.memory = ReplayMemory({
            "capacity": self.memory_capacity,
            "device": self.device,
            "n_steps": self.n_steps,
        })

        self.prefill_memory(env, self.batch_size)

        start_time = time.time()        
        
        for epoch in tqdm(range(self.episode_num), desc="Training Model"):
            state = env.reset()[0]
            
            # True when agent reaches the end states (colliding or passing the time)
            done = False 
            
            # TODO: see how many actions until truncate
            # True when agent takes more than some actions 
            truncated = False
            episode_rewards = []
            episode_loss = []
            episode_len = 0
            while(not done and not truncated):
                # choose best action
                action = self.get_action(state)
                next_state, reward, done, truncated, info = env.step(action)
                self.memory.store(state, action, next_state, reward, done)
                
                episode_loss.append(self.experience_replay())
                
                state = next_state
                
                episode_rewards.append(reward)
                episode_len += 1
                
            self.metrics.add("rollout/rewards", sum(episode_rewards) / len(episode_rewards), epoch)
            self.metrics.add("rollout/exploration-rate", self.epsilon, epoch)
            self.metrics.add("rollout/episode-length", episode_len, epoch)
            self.metrics.add("train/loss", sum(episode_loss) / len(episode_loss), epoch)
            
            if self.timeout:
                elapsed_time = time.time() - start_time
                if elapsed_time > self.timeout:
                    print("Timeout reached. Stopping training.\n")
                    break
            
            # if epoch % 1000 == 0:
            self.decay_epsilon()
            self.update_target_network()
            
        self.metrics.close()
        
        if self.to_save_model:
            self.save_model()
        
    # either the policies are able to get miltuple actions and into the NN or the input of NN should be able to handle all of these
    # output (one of): {0: 'LANE_LEFT', 1: 'IDLE', 2: 'LANE_RIGHT', 3: 'FASTER', 4: 'SLOWER'}
    def get_action(self, state, eval_mode=False):
        if random.random() <= self.epsilon and not eval_mode: # amount of exploration reduces with the epsilon value
            return random.randrange(self.num_actions)
        
        state = torch.tensor(np.array([state]), dtype=torch.float32).to(self.device)
        actions = self.q_net(state)
        return torch.argmax(actions).item()             

    def experience_replay(self):
        states, actions, next_states, rewards, dones = self.memory.sample(self.batch_size)
        
        q_pred = self.q_net(states)
        # q value of the action taken
        q_pred = q_pred.gather(1, actions.view(-1, 1)) 
        q_pred = q_pred.squeeze(1)

        # Double DQN
        if self.double:
             # pick best actions from policy network
            q_best_action = self.q_net(next_states)
            _, q_best_action = q_best_action.max(dim=1)
            q_best_action = q_best_action.unsqueeze(1)
            
            # use those actions for the target policy
            q_target = self.q_target_net(next_states)
            q_target = q_target.gather(1, q_best_action)
            q_target = q_target.squeeze(1)
        else:
            q_target = self.q_target_net(next_states)
            q_target = q_target.max(dim=1).values
        
        # setting Q(s',a') to 0 when the current state is a terminal state
        q_target[dones] = 0.0
        
        y_j = rewards + (self.discount * q_target)
        
        # calculate the loss as the mean-squared error of yj and qpred
        self.optimizer.zero_grad()
        loss = F.mse_loss(y_j, q_pred).mean()
        loss.backward()
        self.optimizer.step()
        
        return loss.item()
        
    def prefill_memory(self, env, prefill_num):
        for _ in tqdm(range(prefill_num), desc="Prefilling Memory "):
            done = False
            truncated = False
            state = env.reset()[0]

            while not done and not truncated:
                action = env.action_space.sample()
                next_state, reward, done, truncated, info = env.step(action)
                self.memory.store(state, action, next_state, reward, done)    
                
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_min, self.epsilon*self.epsilon_decay)
    
    def evaluate(self, env, episode_num):
        # add camera here
        for _ in tqdm(range(episode_num), desc="Evaluating Model"):
            state = env.reset()[0]  
            done = False      
            truncated = False 

            # Agent navigates map until it falls into a hole (terminated), reaches goal (terminated), or has taken 200 actions (truncated).
            while(not done and not truncated):  
                # Select best action   
                action = self.get_action(state, eval_mode=True)
                next_state, reward, done, truncated, info = env.step(action)
                env.render()
        
    def save_model(self):
        folder_name = self.policy + "_save_models"
        self.create_folder(folder_name)
        new_model_num = str(len(os.listdir("./" +folder_name)) + 1)
        file_name = f'{folder_name}/DQN_{new_model_num}_{self.time}.pth'
        state = {'state_dict': self.q_net.state_dict(),
            'optimizer': self.optimizer.state_dict()}
        torch.save(state, file_name)
        
    def load_model(self, env, file_name):
        folder_name = self.policy + "_save_models"
                
        filename = folder_name + "/" + file_name + ".pth"
        self.create_network(env)
        
        models = torch.load(filename, map_location=self.device)
        
        self.q_net.load_state_dict(models['state_dict'])
        self.optimizer.load_state_dict(models['optimizer'])
    
    def save_params(self, params):
        folder_name = "hyperparameters"
        self.create_folder(folder_name)
        
        file_name = f'./{folder_name}/{self.policy}_DQN_{self.time}'
        with open(file_name + '.txt', 'w') as file:
            file.write(json.dumps(str(params)))

    def create_folder(self, directory_name):
        try:
            os.mkdir(directory_name)
            print(f"Directory '{directory_name}' created successfully.")
        except FileExistsError:
            return
        except PermissionError:
            print(f"Permission denied: Unable to create '{directory_name}'.")
        except Exception as e:
            print(f"An error occurred: {e}")


In [59]:
config = {}
policy = "CnnPolicy"
# policy = "MlpPolicy"

if policy == "CnnPolicy":
    config={
        "lanes_count" : 3,
        "observation": {
            "type": "GrayscaleObservation",
            "observation_shape": (128, 64),
            "stack_size": 4,
            "weights": [0.2989, 0.5870, 0.1140],  # weights for RGB conversion keep this conversion this is in the highway env page
            "scaling": 1.75,
        },
    }
else:
    config = {
        "lanes_count" : 3,
        "observation": {
            "type": "Kinematics",
            "vehicles_count": 5,
            "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
            "features_range": {
                "x": [-100, 100],
                "y": [-100, 100],
                "vx": [-20, 20],
                "vy": [-20, 20]
            },
            "absolute": False,
            "order": "sorted"
        }
    }

In [146]:
params = {
    'policy' : policy,
    'episode_num' : 1000,
    'discount' : 0.8,
    'batch_size' : 16,
    'n_steps': 4,
    'double': True,
    'device' : torch.device("mps"),
    'memory_capacity' : 10000,
    'timeout_minute': 1,
    'use_metrics' : False,
    'save_model': False,
}

dqn_agent = DQNAgent(params)
env = gym.make('highway-fast-v0', render_mode='rgb_array', config=config)
dqn_agent.learn(env)

env = gym.make('highway-v0', render_mode='rgb_array', config=config)
dqn_agent.evaluate(env, 10)

# if you wanna save a model again
# dqn_agent.save_model("highway_dqn_model")

Prefilling Memory : 100%|██████████| 16/16 [00:03<00:00,  4.19it/s]
Training Model:  17%|█▋        | 170/1000 [04:16<20:50,  1.51s/it]


KeyboardInterrupt: 

In [99]:

env = gym.make('highway-v0', render_mode='rgb_array', config=config)

dqn_agent_test = DQNAgent(params)
dqn_agent_test.load_model(env, "DQN_3_2024-12-24|10:46:24.193447")

dqn_agent_test.evaluate(env, 20)


  models = torch.load(filename, map_location=self.device)
Evaluating Model:   0%|          | 0/20 [00:00<?, ?it/s]

tensor([[   -0.0000, -8770.1182,  2399.5125,     0.0000, -2908.1982]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8549.9873, -8838.2910,     0.0000,  6042.5273,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8784.0957,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.2500, 0.0000, 0.2500, 0.2500, 0.2500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:   5%|▌         | 1/20 [00:00<00:18,  1.02it/s]

tensor([[-8558.8809, -8827.7100,  2423.6118,  6067.7080,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8539.6514, -8807.8467,  2445.2800,  6045.2661,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[  -0.0000,   -0.0000, 2403.3269, 6054.5024,   -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  10%|█         | 2/20 [00:01<00:18,  1.00s/it]

tensor([[-8511.7383,    -0.0000,  2490.5554,  6003.3286, -2905.4438]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8775.1357,  2460.4775,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  15%|█▌        | 3/20 [00:02<00:14,  1.17it/s]

tensor([[   -0.0000,    -0.0000,     0.0000,  6019.4766, -2846.6851]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8558.3945, -8830.0674,     0.0000,     0.0000, -2870.7412]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.5000, 0.5000, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8569.0361,    -0.0000,  2368.9377,  6060.7788, -2860.5146]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8540.6738,    -0.0000,  2416.6252,  6046.9609, -2870.5032]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  20%|██        | 4/20 [00:04<00:16,  1.05s/it]

tensor([[   -0.0000,    -0.0000,     0.0000,     0.0000, -2873.8994]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.2500, 0.2500, 0.2500, 0.2500, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8814.1631,  2400.1755,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[  -0.0000,   -0.0000, 2403.5991, 5990.6343,   -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8529.0967, -8809.9434,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  25%|██▌       | 5/20 [00:05<00:17,  1.16s/it]

tensor([[   -0.0000, -8811.5947,  2485.3889,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8496.9766, -8776.6846,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,  2476.5017,     0.0000, -2910.2288]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8518.9639, -8803.5947,     0.0000,  6007.2695, -2926.7568]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  30%|███       | 6/20 [00:06<00:17,  1.22s/it]

tensor([[   -0.0000, -8778.9209,  2371.9949,     0.0000, -2840.9897]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8505.6318, -8775.7158,  2419.5999,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8775.2764,  2353.8770,  6026.2705, -2843.2256]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8780.2676,  2392.9507,     0.0000, -2904.1072]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  35%|███▌      | 7/20 [00:08<00:16,  1.26s/it]

tensor([[  -0.0000,   -0.0000, 2492.3000,    0.0000,   -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8450.4160,    -0.0000,  2436.1694,  5978.3745,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,  2510.4150,     0.0000, -2916.6406]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  40%|████      | 8/20 [00:09<00:14,  1.18s/it]

tensor([[   -0.0000,    -0.0000,     0.0000,  6010.4424, -2675.3989]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8474.2412,    -0.0000,  2270.3926,     0.0000, -2704.6902]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8473.0010, -8782.4189,  2294.4070,  5994.4912, -2695.7891]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8818.8232,     0.0000,     0.0000, -2718.8093]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.3333, 0.0000, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[  -0.0000,   -0.0000, 2325.7273, 6036.5835,   -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -

Evaluating Model:  45%|████▌     | 9/20 [00:11<00:15,  1.44s/it]

tensor([[   -0.0000,    -0.0000,  2387.0266,  6034.4795, -2877.1606]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8491.6895, -8759.8135,     0.0000,  6000.8525, -2878.9768]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8514.6562, -8784.7881,     0.0000,  6026.4263,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,  2410.4460,     0.0000, -2886.4802]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8567.8467,    -0.0000,  2402.7493,  6065.1484, -2887.6973]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8564.1367, -8828.9893,   

Evaluating Model:  50%|█████     | 10/20 [00:13<00:18,  1.83s/it]

tensor([[   -0.0000, -8779.3145,  2310.2092,  5965.7939, -2686.5967]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8495.6133,    -0.0000,     0.0000,     0.0000, -2688.3474]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.3333, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8789.0635,     0.0000,  6007.0234,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8541.6416,    -0.0000,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.2500, 0.2500, 0.2500, 0.2500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  55%|█████▌    | 11/20 [00:15<00:15,  1.68s/it]

tensor([[   -0.0000, -8798.2207,     0.0000,     0.0000, -2914.0476]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.3333, 0.0000, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8488.6592, -8780.3584,     0.0000,     0.0000, -2896.3677]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.5000, 0.5000, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8475.3623,    -0.0000,  2441.4973,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  60%|██████    | 12/20 [00:16<00:11,  1.48s/it]

tensor([[-8516.2451, -8803.7314,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8503.3359,    -0.0000,     0.0000,  5988.9668, -2663.3184]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8478.2559, -8750.9346,  2279.4270,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8479.4346, -8770.0029,     0.0000,     0.0000, -2689.3374]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.5000, 0.5000, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8560.3076, -8838.0322,     0.0000,     0.0000, -2708.6907]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.5000, 0.5000, 0.0000]], devic

Evaluating Model:  65%|██████▌   | 13/20 [00:18<00:11,  1.64s/it]

tensor([[   -0.0000, -8812.1826,     0.0000,     0.0000, -2918.2358]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.3333, 0.0000, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8521.2686, -8789.2041,  2485.9221,  6015.7314,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8526.3008, -8811.1191,  2458.2271,     0.0000, -2912.3242]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8527.2744, -8795.9287,  2487.1677,  6011.9150,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  70%|███████   | 14/20 [00:19<00:09,  1.55s/it]

tensor([[   -0.0000, -8801.4375,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.2500, 0.0000, 0.2500, 0.2500, 0.2500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8519.5303,    -0.0000,  2317.5598,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8471.7197, -8758.6143,  2305.1560,  6003.0669,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8537.7930,    -0.0000,     0.0000,     0.0000, -2693.9395]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.3333, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8576.4170, -8865.0352,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], devic

Evaluating Model:  75%|███████▌  | 15/20 [00:21<00:08,  1.79s/it]

tensor([[   -0.0000,    -0.0000,  2519.2739,  5961.4316, -2896.3013]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8459.0225, -8745.1729,     0.0000,  5950.1387, -2888.1309]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8724.2295,     0.0000,  5949.1621,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8518.5264, -8805.0127,  2471.4836,     0.0000, -2909.2949]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  80%|████████  | 16/20 [00:23<00:06,  1.65s/it]

tensor([[   -0.0000, -8884.3721,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.2500, 0.0000, 0.2500, 0.2500, 0.2500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8507.1475, -8794.0400,     0.0000,  6004.5166, -2867.3611]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,  2455.0603,     0.0000, -2935.5593]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8524.8057, -8819.5381,     0.0000,  6000.7158, -2906.3994]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  85%|████████▌ | 17/20 [00:24<00:04,  1.55s/it]

tensor([[-8535.6611,    -0.0000,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.2500, 0.2500, 0.2500, 0.2500]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[  -0.0000,   -0.0000, 2433.1858, 6051.3398,   -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,     0.0000,     0.0000, -2881.3979]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.2500, 0.2500, 0.2500, 0.2500, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8826.0654,     0.0000,     0.0000, -2902.2871]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.3333, 0.0000, 0.3333, 0.3333, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)


Evaluating Model:  90%|█████████ | 18/20 [00:25<00:02,  1.50s/it]

tensor([[-8510.5029, -8784.0303,  2341.1194,     0.0000, -2721.8931]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8498.0225, -8778.8027,     0.0000,     0.0000, -2662.5183]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.5000, 0.5000, 0.0000]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8480.6729, -8783.1045,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[-8570.4639,    -0.0000,  2324.5725,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8521.5811, -8809.7334,  2292.7952,     0.0000, -2709.0364]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<

Evaluating Model:  95%|█████████▌| 19/20 [00:30<00:02,  2.36s/it]

tensor([[-8476.7178, -8762.5361,     0.0000,  5957.5884, -2876.7578]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000,    -0.0000,  2497.9519,  6035.9854, -2942.3179]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)
tensor([[-8478.6875, -8741.1748,     0.0000,     0.0000,    -0.0000]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0.0000, 0.0000, 0.3333, 0.3333, 0.3333]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)
tensor([[   -0.0000, -8814.0127,  2506.6042,     0.0000, -2932.4558]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 1., 0., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)


Evaluating Model:  95%|█████████▌| 19/20 [00:31<00:01,  1.67s/it]

tensor([[-8501.7441, -8783.5869,  2472.9182,  5987.3418, -2908.2495]],
       device='mps:0', grad_fn=<MulBackward0>)
tensor([[0., 0., 0., 1., 0.]], device='mps:0', grad_fn=<SoftmaxBackward0>)





KeyboardInterrupt: 

### Run the Tensorboard

In [8]:
%reload_ext tensorboard

%tensorboard --logdir training_results --host localhost --port 6010