Using GRPO to finetune the chat gpt2 model

# Imports

In [13]:
from importlib.metadata import version
import torch, tiktoken, time, os, tensorflow
import torch.optim as optim
import numpy as np
import argparse
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import zipfile
from pathlib import Path
import pandas as pd
from torch.nn import Module # For type hinting

pkgs = ["numpy", 
        "tiktoken", 
        "torch",
        # "tensorflow", # For OpenAI's pretrained weights
        "pandas"
       ]
for p in pkgs:
    print(f"{p} version: {version(p)}")

numpy version: 1.23.5
tiktoken version: 0.9.0
torch version: 2.5.1
pandas version: 2.3.1


In [2]:
!python --version

Python 3.10.8


In [2]:
os.cpu_count()

24

In [None]:
from utils.previous_chapters import generate_text_simple, text_to_token_ids, token_ids_to_text,GPTModel, create_dataloader_v1, load_weights_into_gpt
# Relative import from the gpt_download.py contained in this folder
from utils.gpt_download import download_and_load_gpt2

## Dataset Class

In [8]:
def prepare_datasets(data_file_path, sep="\t", header=None, column_names=["Label", "Text"], train_frac=0.7, validation_frac=0.15, store_directory="./"):
    df = pd.read_csv(data_file_path, sep=sep, header=header, names=column_names)

    # Count the instances of "spam"
    num_spam = df[df["Label"] == "spam"].shape[0]
    
    # Randomly sample "ham" instances to match the number of "spam" instances
    ham_subset = df[df["Label"] == "ham"].sample(num_spam, random_state=123)
    
    # Combine ham "subset" with "spam"
    balanced_df = pd.concat([ham_subset, df[df["Label"] == "spam"]])

    balanced_df["Label"] = balanced_df["Label"].map({"ham": 0, "spam": 1})

    # Shuffle the entire DataFrame
    balanced_df = balanced_df.sample(frac=1, random_state=123).reset_index(drop=True)

    # Calculate split indices
    train_end = int(len(balanced_df) * train_frac)
    validation_end = train_end + int(len(balanced_df) * validation_frac)

    # Split the DataFrame
    train_df = balanced_df[:train_end]
    validation_df = balanced_df[train_end:validation_end]
    test_df = balanced_df[validation_end:]

    train_df.to_csv(store_directory+"/train.csv", index=None)
    validation_df.to_csv(store_directory+"/validation.csv", index=None)
    test_df.to_csv(store_directory+"/test.csv", index=None)

In [9]:
class SpamDataset(Dataset):
    def __init__(self, csv_file, tokenizer, max_length=None, pad_token_id=50256):
        try:
            self.data = pd.read_csv(csv_file)
        except FileNotFoundError:
            raise FileNotFoundError(f"File not found: {csv_file}")

        # Pre-tokenize texts
        self.encoded_texts = [
            tokenizer.encode(text) for text in self.data["Text"]    # For each row in the text section of the pandas data frame tokenize the text string(sentence); creates list of token IDs for each example/item of the text data
        ]

        if max_length is None:
            self.max_length = self._longest_encoded_length()
        else:
            self.max_length = max_length
            # Truncate sequences if they are longer than max_length
            self.encoded_texts = [
                encoded_text[:self.max_length]
                for encoded_text in self.encoded_texts
            ]

        # Pad sequences to the longest sequence
        self.encoded_texts = [
            encoded_text + [pad_token_id] * (self.max_length - len(encoded_text))
            for encoded_text in self.encoded_texts
        ]

    def __getitem__(self, index):
        encoded = self.encoded_texts[index]
        label = self.data.iloc[index]["Label"]
        return (
            torch.tensor(encoded, dtype=torch.long),
            torch.tensor(label, dtype=torch.long)
        )

    def __len__(self):
        return len(self.data)

    def _longest_encoded_length(self):
        max_length = 0
        for encoded_text in self.encoded_texts:
            encoded_length = len(encoded_text)
            if encoded_length > max_length:
                max_length = encoded_length
        return max_length

## Building Policies

In [10]:
def build_new_policy(base_config, chosen_model="gpt2-small (124M)", num_classes = 2) -> GPTModel:
    """Build and load in the GPT2 model. Swap out the Head layer, and freeze up to the last Transformer module for transfer learning."""
    model_configs = {
        "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
        "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
        "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
        "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
    }

    base_config.update(model_configs[chosen_model]) # add the emb_dim, n_layers, and n_heads to the config

    model_size = chosen_model.split(" ")[-1].lstrip("(").rstrip(")")
    allowed_sizes = ("124M", "355M", "774M", "1558M")
    if model_size not in allowed_sizes:
        raise ValueError(f"Model size not in {allowed_sizes}")
    settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")

    model = GPTModel(base_config)

    load_weights_into_gpt(model, params)
    
    for param in model.parameters(): # freeze model parameters
        param.requires_grad = False 

    # Unfreeze the last transformer block
    for param in model.trf_blocks[-1].parameters():
        param.requires_grad = True

    # Unfreeze the final layer normalizing layer
    for param in model.final_norm.parameters():
        param.requires_grad = True

    model.out_head = torch.nn.Linear(in_features=base_config["emb_dim"], out_features=num_classes) # reconfigure the output layer
    return model

In [7]:
def build_old_policy(base_config, chosen_model="gpt2-small (124M)", num_classes = 2) -> GPTModel:
    """Construct just the model without loading the weights"""
    model_configs = {
        "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
        "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
        "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
        "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
    }

    base_config.update(model_configs[chosen_model]) # add the emb_dim, n_layers, and n_heads to the config

    model_size = chosen_model.split(" ")[-1].lstrip("(").rstrip(")")
    allowed_sizes = ("124M", "355M", "774M", "1558M")
    if model_size not in allowed_sizes:
        raise ValueError(f"Model size not in {allowed_sizes}")
    model = GPTModel(base_config)

    model.out_head = torch.nn.Linear(in_features=base_config["emb_dim"], out_features=num_classes) # reconfigure the output layer
    return model

## Utility Functions

In [11]:
def calculate_discounted_rewards(predictions, batch_labels) -> torch.tensor:
    """For this implementation, use non-discounted rewards"""
    disc_rewards = (predictions == batch_labels).float()    # Simple comparison to evaluate rewards for each example; output a tensor of floats
    return disc_rewards

In [None]:
def log_epoch_stats(epoch, epoch_limit, total_loss, ratio, entropy) -> None:
    print(f"=====================  [Epoch ({epoch})]  =====================")
    print("Last k_epoch stats:")
    print(f"Loss: {total_loss:.7f} | Ratio: {ratio:.7f} | Entropy Term: {entropy:.7f}")
    print(f"===========================================================")


In [None]:
def evaluate_policy(Policy: Module, dataloader: DataLoader, current_epoch: int = None, max_epochs: int=None, device: str = 'cpu') -> float:
    """
    Evaluates the policy model (greedy version) on a given dataset.

    Returns:
        accuracy (float): The calculated accuracy
    """
    Policy.eval()   # Turn off dropout layers and prevent grad tracking

    # Dataset check before continuing
    if len(dataloader.dataset) == 0: # Check the underlying dataset size
        print(f"Warning: Evaluation dataset is empty. Skipping accuracy calculation.")
        return float('nan')
    
    accuracy, num_correct, num_of_samples= 0.0, 0.0, 0.0

    Softmax_lyr = torch.nn.Softmax(dim=-1)
    
    with torch.no_grad():
        for batch_inputs, batch_labels in dataloader:
            batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device) # move the training data to the target device

            logits = Policy(batch_inputs)[:,-1,:]   # Get logits from model and only focus on the last iterations of each sample
            # print(old_logits)
            
            classification_probabilities = Softmax_lyr(logits)
            class_predictions = torch.argmax(classification_probabilities, dim=-1).flatten()
            num_of_samples += batch_labels.size(0)
            num_correct += sum((class_predictions == batch_labels).float()).item()
        accuracy = num_correct/num_of_samples
        if current_epoch and max_epochs:   # If the function was called in the training loop
            print(f"===================  [Epoch ({current_epoch}/{max_epochs})]  ===================")
            print(f"Entire Validation Dataset Accuracy: {accuracy:.4f}| {num_correct} / {num_of_samples} samples")
            print(f"====================================================")

        else:   # If the function was called outside of the training loop
            print(f"===============================================")
            print(f"Entire Dataset Accuracy: {accuracy:.4f} | {num_correct} / {num_of_samples} samples")
            print(f"=====================================================")

            
    Policy.train()  # set back to training mode 
    return accuracy

## Training Loop

In [106]:
def grpo_train(model_config, train_dataloader, validation_dataloader, gpt_size="gpt2-small (124M)", epochs=50, learning_rate=0.0001, batch_size=64, gamma=0.99, k_epochs=64, epsilon=0.2, beta_kl=0.01, max_grad_norm=0.5, entropy_coeff=0.01, log_iterations=10, eval_iterations=10, device="cpu", num_envs=None) -> Module:
    print(f"Training Policy on {device} with {epochs} main epochs, {k_epochs} inner epochs, {learning_rate} learning rate, batch size={batch_size}, KL beta={beta_kl}.")
    print(f"Using gpt2 size: '{gpt_size}', logging every {log_iterations} epoch iterations, evaluating every {eval_iterations} epoch iterations.")


    Policy_New = build_new_policy(model_config, chosen_model=gpt_size, num_classes=2).to(device)   # STEP 3 || 
    Policy_New.train()
    # Policy_New = torch.compile(Policy_New) # to reap efficiency benefits ; Not Working - triton dependency!!

    optimizer = optim.Adam(params=Policy_New.parameters(), lr=learning_rate)

    Policy_Old = build_old_policy(model_config, chosen_model=gpt_size, num_classes=2).to(device)
    Policy_Old.eval()
    # Policy_Old = torch.compile(Policy_Old)

    for epoch in tqdm(iterable=range(epochs), desc=f">>>>>>>>>>>>>>>>>>>>>\nMain Epoch (Outer Loop)", leave=True):     # STEP 4 || 
        # STEP 5 || Sample a batch D_b from D --> OMITTED 
        # STEP 6 || Update the old policy model PI old <- PI new
        Policy_Old.load_state_dict(Policy_New.state_dict())
        # print("loaded Policy Old Weights")
        # --- STEP 7 Collect a Batch of Experiences Using the Old Policy---
        # for batch_inputs, batch_labels in train_dataloader:
        #     pass
        batch_inputs, batch_labels = next(iter(train_dataloader))
        # print(f"batch_inputs shape: {batch_inputs.shape}")
        # print(f"batch_labels shape: {batch_labels.shape}")

        batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device) # move the training data to the target device
        
        with torch.no_grad():
            old_logits = Policy_Old(batch_inputs)[:,-1,:]   # Get logits from model and only focus on the last iterations of each sample
            # print(old_logits)
            # print(f"old_logits shape: {old_logits.shape}")

            old_dist = torch.distributions.Categorical(logits=old_logits) # Create a distribution to sample from
            old_predictions = old_dist.sample() # Tensor of shape (batch_size,) ; list of predictions
            print(f"old_predictions: \n{old_predictions[:10]}")
            print(f"batch_labels True Values: \n{batch_labels[:10]}")

            old_log_probs = old_dist.log_prob(old_predictions)

        # STEP 8 || Calculate "Discounted" Rewards for completed trajectories
        discounted_rewards = calculate_discounted_rewards(old_predictions, batch_labels)    # Tensor with "discounted" rewards per each sample in batch
        # print("Calculated discounted returns")
        print(f"discounted_rewards: \n{discounted_rewards[:10]} Shape: {discounted_rewards.shape}")

        # STEP 9 || Calculate the Advantage for each Trajectory using normalization
        all_advantages_tensor = (discounted_rewards - discounted_rewards.mean()) / (discounted_rewards.std() + 1e-8)
        print(f"all_advantages_tensor: \n{all_advantages_tensor[:10]}")


        Policy_New.train()  # Prepare NN for updates

        # --- STEP 10 || GRPO Optimization ---
        for k_epoch in tqdm(iterable=range(k_epochs), desc=f"Epoch {epoch+1}/{epochs} (Inner K-Epochs)", leave=True):
            print(f"===========================  [({k_epoch+1}/{k_epochs})]  ==========================\n")
            optimizer.zero_grad()   # Flush out all the accumulated gradients for the weights of the model-under-training

            new_logits = Policy_New(batch_inputs)[:,-1,:]   # Get logits from model and only focus on the last iterations of each sample
            new_dist = torch.distributions.Categorical(logits=new_logits)
            new_log_probs = new_dist.log_prob(old_predictions)  # Get the log probability of choosing the same action that the old policy took using the new distribution
            entropy = new_dist.entropy().mean() # Calculate entropy for regularization
            print(f"Entropy of this k_epoch: {entropy}")
            R1_ratio = torch.exp(new_log_probs - old_log_probs) # Exponent trick
            # print(f"R1_ratio: \n{R1_ratio[:10]}")

            unclipped_surrogate = R1_ratio * all_advantages_tensor
            clipped_surrogate = torch.clamp(input=R1_ratio, min=1.0-epsilon, max=1.0+epsilon) * all_advantages_tensor
            # print(f"unclipped_surrogate: \n{unclipped_surrogate[:10]}\nclipped_surrogate: \n{clipped_surrogate[:10]}")
            policy_loss = -torch.min(unclipped_surrogate, clipped_surrogate).mean()

            print(f"Average policy_loss of this k_epoch: {policy_loss}")

            # --- KL Divergence Calculation ---
            # Create distributions for old policies using the trajectory states
            # with torch.no_grad():
            #     old_logits = Policy_Old(all_states_tensor)
            # old_dist = torch.distributions.Categorical(logits=old_logits)
            # INSTEAD, just reusing the calculated logits from STEP #7

            # Calculate KL divergence per sample, then take the mean over the batch
            kl_div_per_sample = torch.distributions.kl.kl_divergence(p=new_dist, q=old_dist)
            # print(f"KL Divergence between new and old policy: \n{kl_div_per_sample[:10]}")
            kl_loss = kl_div_per_sample.mean() # Mean over the batch
            print(f"KL Divergence Average Loss: {kl_loss}")

            # Total Loss for GRPO
            total_loss = policy_loss + beta_kl * kl_loss - entropy_coeff * entropy
            print(f"Total Loss of this k_epoch: {total_loss}\n")
            # STEP 11 || Policy Updates
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(Policy_New.parameters(), max_grad_norm)
            optimizer.step()    # Update policy parameters using gradient ascent
                

        # --- Logging and Evaluation ---
        if (epoch + 1) % log_iterations == 0:
            log_epoch_stats(epoch=epoch+1, epoch_limit=epochs, total_loss=total_loss.item(), ratio=R1_ratio.mean().item(), entropy=entropy)

        if (epoch + 1) % eval_iterations == 0:
            accuracy = evaluate_policy(Policy_New, validation_dataloader, current_epoch=epoch+1, max_epochs=epochs, device=device)

    Policy_New.eval()   # Change to eval mode for evaluation after training is complete

    print("Training complete.")
    return Policy_New # Return the trained policy

## Main Loop

In [17]:
BASE_CONFIG = {
    "vocab_size": 50257,     # Vocabulary size
    "context_length": 1024,  # Context length
    "drop_rate": 0.1,        # Dropout rate
    "qkv_bias": True         # Query-key-value bias
}

In [108]:
# Pretend that the argument parser will pass these arguments to the main function
args = {
    "epochs":32,
    "learning_rate":0.0003,
    "dataloader_batch_size":64,
    "dataloader_pin_memory": True,  
    "dataloader_num_workers": 0,    # Problem if I change this; slow for windows; try to modify within .py script
    "batch_size":None, # Not needed in this build/project
    "gpt_size":'gpt2-small (124M)',
    "k_epochs":64,       # GRPO Inner-loop
    "epsilon":0.2,
    "beta_kl":0.01,
    "entropy_coeff":0.05,   # 
    "log_iterations":1,     # Log GRPO stats
    "eval_iterations":1,    # Run model through evaluation at every "x" epochs
    "gamma":None,   # Discounted Rewards
    "device":'cuda',
    "num_envs":None,
    "save_model":True,
    "model_output_path":'models/first.pt'
}

In [19]:
prepare_datasets(data_file_path="./sms_spam_collection/SMSSpamCollection.tsv", store_directory="./sms_spam_collection/data_splits")

In [20]:
tokenizer = tiktoken.get_encoding("gpt2")

In [21]:
train_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/train.csv", tokenizer=tokenizer)
# test_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/test.csv", tokenizer=tokenizer)
validation_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/validation.csv", tokenizer=tokenizer)

In [22]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=args["dataloader_batch_size"], num_workers=args["dataloader_num_workers"], pin_memory=args["dataloader_pin_memory"], drop_last=True)
# test_dataloader = DataLoader(dataset=test_dataset, batch_size=args["dataloader_batch_size"], num_workers=args["dataloader_num_workers"], pin_memory=args["dataloader_pin_memory"], drop_last=True)
validation_dataloader = DataLoader(dataset=validation_dataset, batch_size=args["dataloader_batch_size"], num_workers=args["dataloader_num_workers"], pin_memory=args["dataloader_pin_memory"], drop_last=True)

In [23]:
print(len(train_dataloader))
print(len(validation_dataloader))


16
3


In [24]:
for batch_inputs, batch_labels in validation_dataloader:
    print(f"{batch_labels.size(0)}")
    break

64


In [109]:
# function call
trained_policy = grpo_train(
        model_config=BASE_CONFIG,
        train_dataloader=train_dataloader,
        validation_dataloader=validation_dataloader,
        gpt_size=args["gpt_size"],
        epochs=args["epochs"],
        eval_iterations=args["eval_iterations"],
        learning_rate=args["learning_rate"],
        batch_size=args["batch_size"], # Significantly larger batch size recommended for stability
        k_epochs=args["k_epochs"],
        epsilon=args["epsilon"],
        beta_kl=args["beta_kl"],
        entropy_coeff=args["entropy_coeff"],
        log_iterations=args["log_iterations"],
        gamma=args["gamma"],
        device=args["device"],
        num_envs=args["num_envs"]
    )

Training Policy on cuda with 32 main epochs, 64 inner epochs, 0.0003 learning rate, batch size=None, KL beta=0.01.
Using gpt2 size: 'gpt2-small (124M)', logging every 1 epoch iterations, evaluating every 1 epoch iterations.
File already exists and is up-to-date: gpt2\124M\checkpoint
File already exists and is up-to-date: gpt2\124M\encoder.json
File already exists and is up-to-date: gpt2\124M\hparams.json
File already exists and is up-to-date: gpt2\124M\model.ckpt.data-00000-of-00001
File already exists and is up-to-date: gpt2\124M\model.ckpt.index
File already exists and is up-to-date: gpt2\124M\model.ckpt.meta
File already exists and is up-to-date: gpt2\124M\vocab.bpe


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):   0%|          | 0/32 [00:00<?, ?it/s]

old_predictions: 
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([0., 0., 1., 1., 1., 0., 0., 1., 0., 0.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([-0.9922, -0.9922,  0.9922,  0.9922,  0.9922, -0.9922, -0.9922,  0.9922,
        -0.9922, -0.9922], device='cuda:0')





Entropy of this k_epoch: 0.03249852731823921
Average policy_loss of this k_epoch: -0.00024547427892684937
KL Divergence Average Loss: 0.012347672134637833
Total Loss of this k_epoch: -0.0017469238955527544


Entropy of this k_epoch: 0.08917655050754547
Average policy_loss of this k_epoch: 0.007896699011325836
KL Divergence Average Loss: 0.0696031004190445
Total Loss of this k_epoch: 0.004133902955800295






Entropy of this k_epoch: 0.14105895161628723
Average policy_loss of this k_epoch: -0.015257537364959717
KL Divergence Average Loss: 0.19067054986953735
Total Loss of this k_epoch: -0.020403780043125153


Entropy of this k_epoch: 0.1654437929391861
Average policy_loss of this k_epoch: 0.012618497014045715
KL Divergence Average Loss: 0.18602150678634644
Total Loss of this k_epoch: 0.006206522695720196


Entropy of this k_epoch: 0.20468920469284058
Average policy_loss of this k_epoch: 0.007583394646644592
KL Divergence Average Loss: 0.23945797979831696
Total Loss of this k_epoch: -0.0002564862370491028






Entropy of this k_epoch: 0.180852472782135
Average policy_loss of this k_epoch: -0.007264845073223114
KL Divergence Average Loss: 0.19433751702308655
Total Loss of this k_epoch: -0.014364093542098999


Entropy of this k_epoch: 0.20968779921531677
Average policy_loss of this k_epoch: -0.014184050261974335
KL Divergence Average Loss: 0.33314311504364014
Total Loss of this k_epoch: -0.021337009966373444


Entropy of this k_epoch: 0.24285562336444855
Average policy_loss of this k_epoch: -0.006832815706729889
KL Divergence Average Loss: 0.2944281995296478
Total Loss of this k_epoch: -0.016031315550208092






Entropy of this k_epoch: 0.3536819815635681
Average policy_loss of this k_epoch: 0.004052542150020599
KL Divergence Average Loss: 0.6176350712776184
Total Loss of this k_epoch: -0.007455207407474518


Entropy of this k_epoch: 0.3789123296737671
Average policy_loss of this k_epoch: 0.004299938678741455
KL Divergence Average Loss: 0.6220133900642395
Total Loss of this k_epoch: -0.008425544947385788


Entropy of this k_epoch: 0.30277925729751587
Average policy_loss of this k_epoch: 0.001258254051208496
KL Divergence Average Loss: 0.4624446630477905
Total Loss of this k_epoch: -0.009256262332201004






Entropy of this k_epoch: 0.2870591878890991
Average policy_loss of this k_epoch: 0.0021278411149978638
KL Divergence Average Loss: 0.42612093687057495
Total Loss of this k_epoch: -0.007963908836245537


Entropy of this k_epoch: 0.19478553533554077
Average policy_loss of this k_epoch: -0.0014583021402359009
KL Divergence Average Loss: 0.16306288540363312
Total Loss of this k_epoch: -0.009566950611770153


Entropy of this k_epoch: 0.21797940135002136
Average policy_loss of this k_epoch: -0.018390558660030365
KL Divergence Average Loss: 0.24923960864543915
Total Loss of this k_epoch: -0.026797132566571236






Entropy of this k_epoch: 0.2645275592803955
Average policy_loss of this k_epoch: -0.002519957721233368
KL Divergence Average Loss: 0.2890467643737793
Total Loss of this k_epoch: -0.012855867855250835


Entropy of this k_epoch: 0.29608988761901855
Average policy_loss of this k_epoch: -0.0058213695883750916
KL Divergence Average Loss: 0.376930832862854
Total Loss of this k_epoch: -0.01685655489563942


Entropy of this k_epoch: 0.35003963112831116
Average policy_loss of this k_epoch: -0.008469685912132263
KL Divergence Average Loss: 0.542060136795044
Total Loss of this k_epoch: -0.02055106684565544






Entropy of this k_epoch: 0.3732151687145233
Average policy_loss of this k_epoch: 0.001465722918510437
KL Divergence Average Loss: 0.5806187391281128
Total Loss of this k_epoch: -0.011388849467039108


Entropy of this k_epoch: 0.39283064007759094
Average policy_loss of this k_epoch: 0.0029692500829696655
KL Divergence Average Loss: 0.6536654233932495
Total Loss of this k_epoch: -0.01013562735170126


Entropy of this k_epoch: 0.3792392611503601
Average policy_loss of this k_epoch: -0.0031801387667655945
KL Divergence Average Loss: 0.593302845954895
Total Loss of this k_epoch: -0.0162090752273798






Entropy of this k_epoch: 0.272172749042511
Average policy_loss of this k_epoch: -0.024655871093273163
KL Divergence Average Loss: 0.2952658236026764
Total Loss of this k_epoch: -0.035311851650476456


Entropy of this k_epoch: 0.22768621146678925
Average policy_loss of this k_epoch: -0.0206897035241127
KL Divergence Average Loss: 0.21346038579940796
Total Loss of this k_epoch: -0.029939409345388412


Entropy of this k_epoch: 0.24039259552955627
Average policy_loss of this k_epoch: 0.0007137656211853027
KL Divergence Average Loss: 0.2098214030265808
Total Loss of this k_epoch: -0.0092076500877738






Entropy of this k_epoch: 0.25218665599823
Average policy_loss of this k_epoch: -0.013108320534229279
KL Divergence Average Loss: 0.26250579953193665
Total Loss of this k_epoch: -0.023092595860362053


Entropy of this k_epoch: 0.28341126441955566
Average policy_loss of this k_epoch: -0.013756319880485535
KL Divergence Average Loss: 0.32794976234436035
Total Loss of this k_epoch: -0.024647384881973267


Entropy of this k_epoch: 0.350829541683197
Average policy_loss of this k_epoch: -0.013246938586235046
KL Divergence Average Loss: 0.4859470725059509
Total Loss of this k_epoch: -0.02592894434928894






Entropy of this k_epoch: 0.385728120803833
Average policy_loss of this k_epoch: -0.007924653589725494
KL Divergence Average Loss: 0.5942748785018921
Total Loss of this k_epoch: -0.021268311887979507


Entropy of this k_epoch: 0.38763344287872314
Average policy_loss of this k_epoch: -0.0085592120885849
KL Divergence Average Loss: 0.5890430212020874
Total Loss of this k_epoch: -0.02205045521259308


Entropy of this k_epoch: 0.3775225281715393
Average policy_loss of this k_epoch: -0.02032100409269333
KL Divergence Average Loss: 0.6230963468551636
Total Loss of this k_epoch: -0.032966166734695435






Entropy of this k_epoch: 0.32107049226760864
Average policy_loss of this k_epoch: -0.020096950232982635
KL Divergence Average Loss: 0.3982556462287903
Total Loss of this k_epoch: -0.03216791898012161


Entropy of this k_epoch: 0.3208690285682678
Average policy_loss of this k_epoch: -0.024628832936286926
KL Divergence Average Loss: 0.4145364761352539
Total Loss of this k_epoch: -0.03652691841125488


Entropy of this k_epoch: 0.27508264780044556
Average policy_loss of this k_epoch: -0.016621530055999756
KL Divergence Average Loss: 0.28813791275024414
Total Loss of this k_epoch: -0.027494283393025398






Entropy of this k_epoch: 0.27441298961639404
Average policy_loss of this k_epoch: -0.03398863226175308
KL Divergence Average Loss: 0.3202487826347351
Total Loss of this k_epoch: -0.04450679570436478


Entropy of this k_epoch: 0.291973352432251
Average policy_loss of this k_epoch: -0.033635251224040985
KL Divergence Average Loss: 0.3536064922809601
Total Loss of this k_epoch: -0.04469785466790199


Entropy of this k_epoch: 0.3221965432167053
Average policy_loss of this k_epoch: -0.02878057211637497
KL Divergence Average Loss: 0.4126027524471283
Total Loss of this k_epoch: -0.040764372795820236






Entropy of this k_epoch: 0.3659619092941284
Average policy_loss of this k_epoch: -0.02469911426305771
KL Divergence Average Loss: 0.580128014087677
Total Loss of this k_epoch: -0.037195928394794464


Entropy of this k_epoch: 0.3659577965736389
Average policy_loss of this k_epoch: -0.03416323661804199
KL Divergence Average Loss: 0.5806200504302979
Total Loss of this k_epoch: -0.046654924750328064


Entropy of this k_epoch: 0.3165420889854431
Average policy_loss of this k_epoch: -0.032320477068424225
KL Divergence Average Loss: 0.4282064437866211
Total Loss of this k_epoch: -0.043865516781806946






Entropy of this k_epoch: 0.3075994551181793
Average policy_loss of this k_epoch: -0.03166031092405319
KL Divergence Average Loss: 0.4459254741668701
Total Loss of this k_epoch: -0.04258102923631668


Entropy of this k_epoch: 0.30043959617614746
Average policy_loss of this k_epoch: -0.04043024778366089
KL Divergence Average Loss: 0.4322683811187744
Total Loss of this k_epoch: -0.0511295422911644


Entropy of this k_epoch: 0.2886151075363159
Average policy_loss of this k_epoch: -0.041156597435474396
KL Divergence Average Loss: 0.4052917957305908
Total Loss of this k_epoch: -0.05153443664312363






Entropy of this k_epoch: 0.3046623468399048
Average policy_loss of this k_epoch: -0.04306682199239731
KL Divergence Average Loss: 0.42714959383010864
Total Loss of this k_epoch: -0.05402844399213791


Entropy of this k_epoch: 0.3537285029888153
Average policy_loss of this k_epoch: -0.038138262927532196
KL Divergence Average Loss: 0.7179837822914124
Total Loss of this k_epoch: -0.04864484816789627


Entropy of this k_epoch: 0.37972205877304077
Average policy_loss of this k_epoch: -0.03394651412963867
KL Divergence Average Loss: 0.7267965078353882
Total Loss of this k_epoch: -0.045664653182029724






Entropy of this k_epoch: 0.38633453845977783
Average policy_loss of this k_epoch: -0.03865046054124832
KL Divergence Average Loss: 0.8777283430099487
Total Loss of this k_epoch: -0.04918990284204483


Entropy of this k_epoch: 0.3510326147079468
Average policy_loss of this k_epoch: -0.039890021085739136
KL Divergence Average Loss: 0.845167875289917
Total Loss of this k_epoch: -0.048989973962306976


Entropy of this k_epoch: 0.26606276631355286
Average policy_loss of this k_epoch: -0.04518597573041916
KL Divergence Average Loss: 0.47485023736953735
Total Loss of this k_epoch: -0.05374061316251755






Entropy of this k_epoch: 0.24950416386127472
Average policy_loss of this k_epoch: -0.045514948666095734
KL Divergence Average Loss: 0.427898108959198
Total Loss of this k_epoch: -0.0537111759185791


Entropy of this k_epoch: 0.2717346251010895
Average policy_loss of this k_epoch: -0.054641131311655045
KL Divergence Average Loss: 0.47784820199012756
Total Loss of this k_epoch: -0.06344938278198242


Entropy of this k_epoch: 0.31294459104537964
Average policy_loss of this k_epoch: -0.04559003934264183
KL Divergence Average Loss: 0.6559466123580933
Total Loss of this k_epoch: -0.0546778067946434






Entropy of this k_epoch: 0.3337191939353943
Average policy_loss of this k_epoch: -0.038461364805698395
KL Divergence Average Loss: 0.760064423084259
Total Loss of this k_epoch: -0.04754668101668358


Entropy of this k_epoch: 0.3184608817100525
Average policy_loss of this k_epoch: -0.05008421838283539
KL Divergence Average Loss: 0.7767547965049744
Total Loss of this k_epoch: -0.058239713311195374


Entropy of this k_epoch: 0.3002922534942627
Average policy_loss of this k_epoch: -0.04941119998693466
KL Divergence Average Loss: 0.7059643864631653
Total Loss of this k_epoch: -0.05736616998910904






Entropy of this k_epoch: 0.32963019609451294
Average policy_loss of this k_epoch: -0.05211484059691429
KL Divergence Average Loss: 0.8532727360725403
Total Loss of this k_epoch: -0.060063622891902924


Entropy of this k_epoch: 0.31199103593826294
Average policy_loss of this k_epoch: -0.056102413684129715
KL Divergence Average Loss: 0.7426989078521729
Total Loss of this k_epoch: -0.06427497416734695


Entropy of this k_epoch: 0.35819554328918457
Average policy_loss of this k_epoch: -0.054368969053030014
KL Divergence Average Loss: 0.9668872356414795
Total Loss of this k_epoch: -0.06260987371206284






Entropy of this k_epoch: 0.365884006023407
Average policy_loss of this k_epoch: -0.05477830767631531
KL Divergence Average Loss: 0.9867233037948608
Total Loss of this k_epoch: -0.06320527195930481


Entropy of this k_epoch: 0.32981401681900024
Average policy_loss of this k_epoch: -0.05646957457065582
KL Divergence Average Loss: 0.9227527976036072
Total Loss of this k_epoch: -0.06373274326324463


Entropy of this k_epoch: 0.29377830028533936
Average policy_loss of this k_epoch: -0.05796261131763458
KL Divergence Average Loss: 0.623371422290802
Total Loss of this k_epoch: -0.06641781330108643






Entropy of this k_epoch: 0.2687203884124756
Average policy_loss of this k_epoch: -0.059595007449388504
KL Divergence Average Loss: 0.5775431394577026
Total Loss of this k_epoch: -0.06725559383630753


Entropy of this k_epoch: 0.27629369497299194
Average policy_loss of this k_epoch: -0.06593120098114014
KL Divergence Average Loss: 0.562191367149353
Total Loss of this k_epoch: -0.07412397116422653


Entropy of this k_epoch: 0.3063027560710907
Average policy_loss of this k_epoch: -0.06770103424787521
KL Divergence Average Loss: 0.7751027345657349
Total Loss of this k_epoch: -0.07526514679193497




Epoch 1/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.51it/s]


Entropy of this k_epoch: 0.3238455355167389
Average policy_loss of this k_epoch: -0.06926068663597107
KL Divergence Average Loss: 0.8289653062820435
Total Loss of this k_epoch: -0.07716330885887146


Entropy of this k_epoch: 0.3409094214439392
Average policy_loss of this k_epoch: -0.06873251497745514
KL Divergence Average Loss: 0.8370336890220642
Total Loss of this k_epoch: -0.07740765064954758

Last k_epoch stats:
Loss: -0.0774077 | Ratio: 0.8204951 | Entropy Term: 0.3409094


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):   3%|▎         | 1/32 [00:05<02:46,  5.37s/it]

Entire Validation Dataset Accuracy: 0.5625| 108.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 1, 1, 1, 1, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 0., 0., 1., 0., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([ 0.6689,  0.6689,  0.6689,  0.6689,  0.6689, -1.4716, -1.4716,  0.6689,
        -1.4716,  0.6689], device='cuda:0')





Entropy of this k_epoch: 0.301899790763855
Average policy_loss of this k_epoch: 0.032637208700180054
KL Divergence Average Loss: 0.01908186823129654
Total Loss of this k_epoch: 0.01773303560912609






Entropy of this k_epoch: 0.3196268677711487
Average policy_loss of this k_epoch: -0.015128038823604584
KL Divergence Average Loss: 0.026176495477557182
Total Loss of this k_epoch: -0.030847618356347084






Entropy of this k_epoch: 0.34542641043663025
Average policy_loss of this k_epoch: -0.04018542170524597
KL Divergence Average Loss: 0.02790767326951027
Total Loss of this k_epoch: -0.05717766657471657


Entropy of this k_epoch: 0.3885279893875122
Average policy_loss of this k_epoch: -0.06214947998523712
KL Divergence Average Loss: 0.07168326526880264
Total Loss of this k_epoch: -0.08085905015468597


Entropy of this k_epoch: 0.40506672859191895
Average policy_loss of this k_epoch: -0.08512695133686066
KL Divergence Average Loss: 0.17762920260429382
Total Loss of this k_epoch: -0.10360399633646011






Entropy of this k_epoch: 0.4276287257671356
Average policy_loss of this k_epoch: -0.078646220266819
KL Divergence Average Loss: 0.21744388341903687
Total Loss of this k_epoch: -0.09785322099924088


Entropy of this k_epoch: 0.39257821440696716
Average policy_loss of this k_epoch: -0.08179431408643723
KL Divergence Average Loss: 0.2813974916934967
Total Loss of this k_epoch: -0.09860925376415253






Entropy of this k_epoch: 0.395425945520401
Average policy_loss of this k_epoch: -0.09211556613445282
KL Divergence Average Loss: 0.21893881261348724
Total Loss of this k_epoch: -0.10969747602939606







Entropy of this k_epoch: 0.3757258355617523
Average policy_loss of this k_epoch: -0.08582767844200134
KL Divergence Average Loss: 0.16172833740711212
Total Loss of this k_epoch: -0.10299669206142426


Entropy of this k_epoch: 0.3526565432548523
Average policy_loss of this k_epoch: -0.08602949976921082
KL Divergence Average Loss: 0.14187726378440857
Total Loss of this k_epoch: -0.10224355757236481



Epoch 2/32 (Inner K-Epochs):  16%|█▌        | 10/64 [00:00<00:04, 12.48it/s][A






Entropy of this k_epoch: 0.3657226860523224
Average policy_loss of this k_epoch: -0.0874711349606514
KL Divergence Average Loss: 0.13821615278720856
Total Loss of this k_epoch: -0.10437510907649994


Entropy of this k_epoch: 0.3693646490573883
Average policy_loss of this k_epoch: -0.09427350759506226
KL Divergence Average Loss: 0.20921295881271362
Total Loss of this k_epoch: -0.11064960807561874


Entropy of this k_epoch: 0.3654022812843323
Average policy_loss of this k_epoch: -0.0907532349228859
KL Divergence Average Loss: 0.2746548652648926
Total Loss of this k_epoch: -0.10627680271863937






Entropy of this k_epoch: 0.33458977937698364
Average policy_loss of this k_epoch: -0.09955774992704391
KL Divergence Average Loss: 0.23259535431861877
Total Loss of this k_epoch: -0.11396128684282303


Entropy of this k_epoch: 0.3368324637413025
Average policy_loss of this k_epoch: -0.09670361876487732
KL Divergence Average Loss: 0.17584189772605896
Total Loss of this k_epoch: -0.11178682744503021






Entropy of this k_epoch: 0.33324283361434937
Average policy_loss of this k_epoch: -0.09501104056835175
KL Divergence Average Loss: 0.12973333895206451
Total Loss of this k_epoch: -0.11037585139274597






Entropy of this k_epoch: 0.323616087436676
Average policy_loss of this k_epoch: -0.10940858721733093
KL Divergence Average Loss: 0.1733936369419098
Total Loss of this k_epoch: -0.12385545670986176


Entropy of this k_epoch: 0.32364171743392944
Average policy_loss of this k_epoch: -0.10783453285694122
KL Divergence Average Loss: 0.15115642547607422
Total Loss of this k_epoch: -0.1225050538778305


Entropy of this k_epoch: 0.28658443689346313
Average policy_loss of this k_epoch: -0.08249169588088989
KL Divergence Average Loss: 0.12617738544940948
Total Loss of this k_epoch: -0.09555914998054504






Entropy of this k_epoch: 0.2805593013763428
Average policy_loss of this k_epoch: -0.09886948764324188
KL Divergence Average Loss: 0.1527019739151001
Total Loss of this k_epoch: -0.11137043684720993


Entropy of this k_epoch: 0.27938199043273926
Average policy_loss of this k_epoch: -0.10468997061252594
KL Divergence Average Loss: 0.2543784976005554
Total Loss of this k_epoch: -0.11611528694629669






Entropy of this k_epoch: 0.27319788932800293
Average policy_loss of this k_epoch: -0.1035057082772255
KL Divergence Average Loss: 0.28072595596313477
Total Loss of this k_epoch: -0.1143583431839943






Entropy of this k_epoch: 0.24220064282417297
Average policy_loss of this k_epoch: -0.1037546768784523
KL Divergence Average Loss: 0.3116026520729065
Total Loss of this k_epoch: -0.11274868249893188


Entropy of this k_epoch: 0.2490597367286682
Average policy_loss of this k_epoch: -0.11171753704547882
KL Divergence Average Loss: 0.28415828943252563
Total Loss of this k_epoch: -0.1213289424777031


Entropy of this k_epoch: 0.2706878185272217
Average policy_loss of this k_epoch: -0.07701805233955383
KL Divergence Average Loss: 0.12233522534370422
Total Loss of this k_epoch: -0.08932908624410629






Entropy of this k_epoch: 0.2628498375415802
Average policy_loss of this k_epoch: -0.051714882254600525
KL Divergence Average Loss: 0.13233816623687744
Total Loss of this k_epoch: -0.06353399157524109


Entropy of this k_epoch: 0.24815401434898376
Average policy_loss of this k_epoch: -0.08197326213121414
KL Divergence Average Loss: 0.19343805313110352
Total Loss of this k_epoch: -0.09244658052921295






Entropy of this k_epoch: 0.257393479347229
Average policy_loss of this k_epoch: -0.09913232177495956
KL Divergence Average Loss: 0.21478694677352905
Total Loss of this k_epoch: -0.10985412448644638






Entropy of this k_epoch: 0.24988959729671478
Average policy_loss of this k_epoch: -0.10342216491699219
KL Divergence Average Loss: 0.3310824930667877
Total Loss of this k_epoch: -0.11260582506656647


Entropy of this k_epoch: 0.2538694143295288
Average policy_loss of this k_epoch: -0.08881746977567673
KL Divergence Average Loss: 0.46389347314834595
Total Loss of this k_epoch: -0.0968720093369484


Entropy of this k_epoch: 0.26529842615127563
Average policy_loss of this k_epoch: -0.08760327845811844
KL Divergence Average Loss: 0.4537227153778076
Total Loss of this k_epoch: -0.09633097797632217






Entropy of this k_epoch: 0.29533249139785767
Average policy_loss of this k_epoch: -0.06682909280061722
KL Divergence Average Loss: 0.5661027431488037
Total Loss of this k_epoch: -0.07593469321727753


Entropy of this k_epoch: 0.2480849325656891
Average policy_loss of this k_epoch: -0.09365077316761017
KL Divergence Average Loss: 0.43372613191604614
Total Loss of this k_epoch: -0.1017177626490593






Entropy of this k_epoch: 0.28200721740722656
Average policy_loss of this k_epoch: -0.09537408500909805
KL Divergence Average Loss: 0.3733053207397461
Total Loss of this k_epoch: -0.10574138909578323






Entropy of this k_epoch: 0.2729189395904541
Average policy_loss of this k_epoch: -0.10527656972408295
KL Divergence Average Loss: 0.2512548863887787
Total Loss of this k_epoch: -0.11640996485948563


Entropy of this k_epoch: 0.302143931388855
Average policy_loss of this k_epoch: -0.09870456159114838
KL Divergence Average Loss: 0.15344512462615967
Total Loss of this k_epoch: -0.1122773066163063


Entropy of this k_epoch: 0.304710328578949
Average policy_loss of this k_epoch: -0.1040089949965477
KL Divergence Average Loss: 0.09759758412837982
Total Loss of this k_epoch: -0.11826853454113007






Entropy of this k_epoch: 0.3102623224258423
Average policy_loss of this k_epoch: -0.09769807755947113
KL Divergence Average Loss: 0.0946328416466713
Total Loss of this k_epoch: -0.11226486414670944


Entropy of this k_epoch: 0.30567795038223267
Average policy_loss of this k_epoch: -0.10441090166568756
KL Divergence Average Loss: 0.11885116994380951
Total Loss of this k_epoch: -0.1185062900185585






Entropy of this k_epoch: 0.31018948554992676
Average policy_loss of this k_epoch: -0.10319728404283524
KL Divergence Average Loss: 0.12046058475971222
Total Loss of this k_epoch: -0.11750215291976929






Entropy of this k_epoch: 0.3209773898124695
Average policy_loss of this k_epoch: -0.10111541301012039
KL Divergence Average Loss: 0.19661235809326172
Total Loss of this k_epoch: -0.11519815772771835


Entropy of this k_epoch: 0.3112558126449585
Average policy_loss of this k_epoch: -0.10622813552618027
KL Divergence Average Loss: 0.21577757596969604
Total Loss of this k_epoch: -0.11963314563035965


Entropy of this k_epoch: 0.30257388949394226
Average policy_loss of this k_epoch: -0.1055445671081543
KL Divergence Average Loss: 0.2346702218055725
Total Loss of this k_epoch: -0.11832655966281891






Entropy of this k_epoch: 0.29723215103149414
Average policy_loss of this k_epoch: -0.11008249223232269
KL Divergence Average Loss: 0.2293168008327484
Total Loss of this k_epoch: -0.12265092879533768


Entropy of this k_epoch: 0.29335230588912964
Average policy_loss of this k_epoch: -0.11622089147567749
KL Divergence Average Loss: 0.21687065064907074
Total Loss of this k_epoch: -0.12871980667114258






Entropy of this k_epoch: 0.28012561798095703
Average policy_loss of this k_epoch: -0.10637138783931732
KL Divergence Average Loss: 0.1740042269229889
Total Loss of this k_epoch: -0.11863762140274048






Entropy of this k_epoch: 0.28242915868759155
Average policy_loss of this k_epoch: -0.10958503186702728
KL Divergence Average Loss: 0.1865575909614563
Total Loss of this k_epoch: -0.12184091657400131


Entropy of this k_epoch: 0.2850826680660248
Average policy_loss of this k_epoch: -0.10398633033037186
KL Divergence Average Loss: 0.1465861201286316
Total Loss of this k_epoch: -0.11677459627389908


Entropy of this k_epoch: 0.2802649736404419
Average policy_loss of this k_epoch: -0.06337485462427139
KL Divergence Average Loss: 0.12805616855621338
Total Loss of this k_epoch: -0.07610753923654556






Entropy of this k_epoch: 0.30787694454193115
Average policy_loss of this k_epoch: -0.11012165993452072
KL Divergence Average Loss: 0.18040955066680908
Total Loss of this k_epoch: -0.12371140718460083


Entropy of this k_epoch: 0.2913582921028137
Average policy_loss of this k_epoch: -0.11371645331382751
KL Divergence Average Loss: 0.2346196174621582
Total Loss of this k_epoch: -0.12593817710876465






Entropy of this k_epoch: 0.3050135672092438
Average policy_loss of this k_epoch: -0.11314426362514496
KL Divergence Average Loss: 0.23115995526313782
Total Loss of this k_epoch: -0.1260833442211151






Entropy of this k_epoch: 0.3251575827598572
Average policy_loss of this k_epoch: -0.10083329677581787
KL Divergence Average Loss: 0.2613394558429718
Total Loss of this k_epoch: -0.11447778344154358


Entropy of this k_epoch: 0.33742570877075195
Average policy_loss of this k_epoch: -0.10319886356592178
KL Divergence Average Loss: 0.2399265468120575
Total Loss of this k_epoch: -0.11767088621854782


Entropy of this k_epoch: 0.3201109766960144
Average policy_loss of this k_epoch: -0.09976451098918915
KL Divergence Average Loss: 0.2020152509212494
Total Loss of this k_epoch: -0.1137499064207077






Entropy of this k_epoch: 0.31765398383140564
Average policy_loss of this k_epoch: -0.11243601143360138
KL Divergence Average Loss: 0.1595449000597
Total Loss of this k_epoch: -0.1267232596874237


Entropy of this k_epoch: 0.32372426986694336
Average policy_loss of this k_epoch: -0.11380255222320557
KL Divergence Average Loss: 0.1457725614309311
Total Loss of this k_epoch: -0.12853103876113892






Entropy of this k_epoch: 0.32278549671173096
Average policy_loss of this k_epoch: -0.1133323535323143
KL Divergence Average Loss: 0.1550075262784958
Total Loss of this k_epoch: -0.12792155146598816






Entropy of this k_epoch: 0.32727986574172974
Average policy_loss of this k_epoch: -0.10347990691661835
KL Divergence Average Loss: 0.1326180100440979
Total Loss of this k_epoch: -0.11851771920919418


Entropy of this k_epoch: 0.32879918813705444
Average policy_loss of this k_epoch: -0.09445582330226898
KL Divergence Average Loss: 0.13044342398643494
Total Loss of this k_epoch: -0.10959134995937347


Entropy of this k_epoch: 0.31070542335510254
Average policy_loss of this k_epoch: -0.11312974244356155
KL Divergence Average Loss: 0.17498913407325745
Total Loss of this k_epoch: -0.1269151270389557






Entropy of this k_epoch: 0.3290156126022339
Average policy_loss of this k_epoch: -0.10818088799715042
KL Divergence Average Loss: 0.2204800546169281
Total Loss of this k_epoch: -0.12242686748504639


Entropy of this k_epoch: 0.3075184226036072
Average policy_loss of this k_epoch: -0.11678574979305267
KL Divergence Average Loss: 0.1758696734905243
Total Loss of this k_epoch: -0.13040298223495483




Epoch 2/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.48it/s]

Entropy of this k_epoch: 0.29339924454689026
Average policy_loss of this k_epoch: -0.1161312386393547
KL Divergence Average Loss: 0.2451922595500946
Total Loss of this k_epoch: -0.12834927439689636

Last k_epoch stats:
Loss: -0.1283493 | Ratio: 0.9999754 | Entropy Term: 0.2933992



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):   6%|▋         | 2/32 [00:10<02:41,  5.38s/it]

Entire Validation Dataset Accuracy: 0.8854| 170.0 / 192.0 samples
old_predictions: 
tensor([1, 1, 1, 1, 1, 0, 1, 1, 0, 1], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([0., 0., 1., 1., 1., 1., 0., 1., 1., 0.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([-2.1778, -2.1778,  0.4520,  0.4520,  0.4520,  0.4520, -2.1778,  0.4520,
         0.4520, -2.1778], device='cuda:0')





Entropy of this k_epoch: 0.2995319664478302
Average policy_loss of this k_epoch: -0.010118745267391205
KL Divergence Average Loss: 0.01581469178199768
Total Loss of this k_epoch: -0.02493719756603241






Entropy of this k_epoch: 0.2967994213104248
Average policy_loss of this k_epoch: -0.05039278790354729
KL Divergence Average Loss: 0.07461133599281311
Total Loss of this k_epoch: -0.06448664516210556






Entropy of this k_epoch: 0.2749098539352417
Average policy_loss of this k_epoch: -0.06469015032052994
KL Divergence Average Loss: 0.03851782903075218
Total Loss of this k_epoch: -0.07805046439170837


Entropy of this k_epoch: 0.25882822275161743
Average policy_loss of this k_epoch: -0.0834452211856842
KL Divergence Average Loss: 0.06826615333557129
Total Loss of this k_epoch: -0.09570397436618805


Entropy of this k_epoch: 0.24999356269836426
Average policy_loss of this k_epoch: -0.08527219295501709
KL Divergence Average Loss: 0.11274732649326324
Total Loss of this k_epoch: -0.09664439409971237






Entropy of this k_epoch: 0.22011080384254456
Average policy_loss of this k_epoch: -0.0921335220336914
KL Divergence Average Loss: 0.07758526504039764
Total Loss of this k_epoch: -0.1023632138967514


Entropy of this k_epoch: 0.1920042484998703
Average policy_loss of this k_epoch: -0.09556746482849121
KL Divergence Average Loss: 0.0672718808054924
Total Loss of this k_epoch: -0.10449495911598206






Entropy of this k_epoch: 0.20846188068389893
Average policy_loss of this k_epoch: -0.08764591813087463
KL Divergence Average Loss: 0.038534343242645264
Total Loss of this k_epoch: -0.09768366813659668






Entropy of this k_epoch: 0.21038657426834106
Average policy_loss of this k_epoch: 0.12688744068145752
KL Divergence Average Loss: 0.07235804200172424
Total Loss of this k_epoch: 0.11709170043468475


Entropy of this k_epoch: 0.18795661628246307
Average policy_loss of this k_epoch: -0.013576425611972809
KL Divergence Average Loss: 0.05921114981174469
Total Loss of this k_epoch: -0.022382143884897232


Entropy of this k_epoch: 0.15799543261528015
Average policy_loss of this k_epoch: -0.07921964675188065
KL Divergence Average Loss: 0.08014478534460068
Total Loss of this k_epoch: -0.08631796389818192






Entropy of this k_epoch: 0.14357593655586243
Average policy_loss of this k_epoch: -0.08648274093866348
KL Divergence Average Loss: 0.09430744498968124
Total Loss of this k_epoch: -0.0927184671163559


Entropy of this k_epoch: 0.09023460745811462
Average policy_loss of this k_epoch: -0.09584898501634598
KL Divergence Average Loss: 0.15378567576408386
Total Loss of this k_epoch: -0.09882285445928574






Entropy of this k_epoch: 0.08322294056415558
Average policy_loss of this k_epoch: -0.09406483173370361
KL Divergence Average Loss: 0.17690835893154144
Total Loss of this k_epoch: -0.09645690023899078






Entropy of this k_epoch: 0.10634157061576843
Average policy_loss of this k_epoch: -0.08207012712955475
KL Divergence Average Loss: 0.2592897415161133
Total Loss of this k_epoch: -0.0847943052649498


Entropy of this k_epoch: 0.06628766655921936
Average policy_loss of this k_epoch: -0.09342833608388901
KL Divergence Average Loss: 0.20421698689460754
Total Loss of this k_epoch: -0.09470055252313614


Entropy of this k_epoch: 0.0916779637336731
Average policy_loss of this k_epoch: -0.0828857272863388
KL Divergence Average Loss: 0.3328837752342224
Total Loss of this k_epoch: -0.08414078503847122






Entropy of this k_epoch: 0.06366943567991257
Average policy_loss of this k_epoch: -0.09745386242866516
KL Divergence Average Loss: 0.16225752234458923
Total Loss of this k_epoch: -0.0990147590637207


Entropy of this k_epoch: 0.10174035280942917
Average policy_loss of this k_epoch: -0.09208817780017853
KL Divergence Average Loss: 0.11776450276374817
Total Loss of this k_epoch: -0.09599754959344864






Entropy of this k_epoch: 0.10393942892551422
Average policy_loss of this k_epoch: -0.09868136048316956
KL Divergence Average Loss: 0.1119050458073616
Total Loss of this k_epoch: -0.10275928676128387






Entropy of this k_epoch: 0.12861567735671997
Average policy_loss of this k_epoch: -0.06669256091117859
KL Divergence Average Loss: 0.09581521153450012
Total Loss of this k_epoch: -0.07216519117355347


Entropy of this k_epoch: 0.10049769282341003
Average policy_loss of this k_epoch: -0.08641573041677475
KL Divergence Average Loss: 0.11080610007047653
Total Loss of this k_epoch: -0.09033256024122238


Entropy of this k_epoch: 0.09727475792169571
Average policy_loss of this k_epoch: -0.09861893951892853
KL Divergence Average Loss: 0.12393803894519806
Total Loss of this k_epoch: -0.10224329680204391






Entropy of this k_epoch: 0.10084986686706543
Average policy_loss of this k_epoch: -0.0954282283782959
KL Divergence Average Loss: 0.1437111496925354
Total Loss of this k_epoch: -0.09903360903263092


Entropy of this k_epoch: 0.10185263305902481
Average policy_loss of this k_epoch: -0.09894786775112152
KL Divergence Average Loss: 0.1161608025431633
Total Loss of this k_epoch: -0.10287889093160629






Entropy of this k_epoch: 0.124539315700531
Average policy_loss of this k_epoch: -0.0984630137681961
KL Divergence Average Loss: 0.10300543904304504
Total Loss of this k_epoch: -0.10365992039442062






Entropy of this k_epoch: 0.1385425180196762
Average policy_loss of this k_epoch: -0.0982637032866478
KL Divergence Average Loss: 0.09617497026920319
Total Loss of this k_epoch: -0.10422907769680023


Entropy of this k_epoch: 0.16111455857753754
Average policy_loss of this k_epoch: -0.09800836443901062
KL Divergence Average Loss: 0.0753268301486969
Total Loss of this k_epoch: -0.1053108274936676


Entropy of this k_epoch: 0.19195210933685303
Average policy_loss of this k_epoch: -0.0682496726512909
KL Divergence Average Loss: 0.0498875230550766
Total Loss of this k_epoch: -0.07734840363264084






Entropy of this k_epoch: 0.2148319035768509
Average policy_loss of this k_epoch: -0.09112133085727692
KL Divergence Average Loss: 0.039124395698308945
Total Loss of this k_epoch: -0.1014716774225235


Entropy of this k_epoch: 0.22138711810112
Average policy_loss of this k_epoch: -0.09266166388988495
KL Divergence Average Loss: 0.033917222172021866
Total Loss of this k_epoch: -0.1033918485045433






Entropy of this k_epoch: 0.22197137773036957
Average policy_loss of this k_epoch: -0.09347368776798248
KL Divergence Average Loss: 0.03505472466349602
Total Loss of this k_epoch: -0.10422170907258987






Entropy of this k_epoch: 0.20984840393066406
Average policy_loss of this k_epoch: -0.09411840885877609
KL Divergence Average Loss: 0.05982702225446701
Total Loss of this k_epoch: -0.10401256382465363


Entropy of this k_epoch: 0.20375844836235046
Average policy_loss of this k_epoch: -0.08884796500205994
KL Divergence Average Loss: 0.07852017879486084
Total Loss of this k_epoch: -0.09825068712234497


Entropy of this k_epoch: 0.18159538507461548
Average policy_loss of this k_epoch: -0.09464365243911743
KL Divergence Average Loss: 0.07938159257173538
Total Loss of this k_epoch: -0.10292960703372955






Entropy of this k_epoch: 0.16891524195671082
Average policy_loss of this k_epoch: -0.09441331773996353
KL Divergence Average Loss: 0.09883402287960052
Total Loss of this k_epoch: -0.10187073796987534


Entropy of this k_epoch: 0.17171333730220795
Average policy_loss of this k_epoch: -0.09374895691871643
KL Divergence Average Loss: 0.06942765414714813
Total Loss of this k_epoch: -0.10164035111665726






Entropy of this k_epoch: 0.14705058932304382
Average policy_loss of this k_epoch: -0.09873797744512558
KL Divergence Average Loss: 0.08202214539051056
Total Loss of this k_epoch: -0.10527028888463974






Entropy of this k_epoch: 0.14915843307971954
Average policy_loss of this k_epoch: -0.09857556223869324
KL Divergence Average Loss: 0.08317604660987854
Total Loss of this k_epoch: -0.10520172119140625


Entropy of this k_epoch: 0.15356512367725372
Average policy_loss of this k_epoch: -0.09874740242958069
KL Divergence Average Loss: 0.07806643098592758
Total Loss of this k_epoch: -0.10564499348402023


Entropy of this k_epoch: 0.16315041482448578
Average policy_loss of this k_epoch: -0.09806500375270844
KL Divergence Average Loss: 0.07086692750453949
Total Loss of this k_epoch: -0.10551385581493378






Entropy of this k_epoch: 0.17846783995628357
Average policy_loss of this k_epoch: -0.09673602879047394
KL Divergence Average Loss: 0.06261245906352997
Total Loss of this k_epoch: -0.10503329336643219


Entropy of this k_epoch: 0.18920785188674927
Average policy_loss of this k_epoch: -0.0056131258606910706
KL Divergence Average Loss: 0.06817823648452759
Total Loss of this k_epoch: -0.014391736127436161






Entropy of this k_epoch: 0.1884053498506546
Average policy_loss of this k_epoch: -0.09613854438066483
KL Divergence Average Loss: 0.054009705781936646
Total Loss of this k_epoch: -0.10501871258020401






Entropy of this k_epoch: 0.18244148790836334
Average policy_loss of this k_epoch: -0.09783594310283661
KL Divergence Average Loss: 0.057064954191446304
Total Loss of this k_epoch: -0.10638736933469772


Entropy of this k_epoch: 0.1717894971370697
Average policy_loss of this k_epoch: -0.09830302745103836
KL Divergence Average Loss: 0.06582803279161453
Total Loss of this k_epoch: -0.10623422265052795


Entropy of this k_epoch: 0.1758117973804474
Average policy_loss of this k_epoch: -0.0981709361076355
KL Divergence Average Loss: 0.06252508610486984
Total Loss of this k_epoch: -0.10633627325296402






Entropy of this k_epoch: 0.18975746631622314
Average policy_loss of this k_epoch: -0.09767669439315796
KL Divergence Average Loss: 0.05737693980336189
Total Loss of this k_epoch: -0.10659079998731613


Entropy of this k_epoch: 0.20846252143383026
Average policy_loss of this k_epoch: -0.09310577809810638
KL Divergence Average Loss: 0.048955999314785004
Total Loss of this k_epoch: -0.10303933918476105






Entropy of this k_epoch: 0.21627286076545715
Average policy_loss of this k_epoch: -0.08974562585353851
KL Divergence Average Loss: 0.045545484870672226
Total Loss of this k_epoch: -0.10010381788015366






Entropy of this k_epoch: 0.21599751710891724
Average policy_loss of this k_epoch: -0.09345616400241852
KL Divergence Average Loss: 0.047767274081707
Total Loss of this k_epoch: -0.10377836972475052


Entropy of this k_epoch: 0.20028719305992126
Average policy_loss of this k_epoch: -0.09477206319570541
KL Divergence Average Loss: 0.0650075376033783
Total Loss of this k_epoch: -0.10413634777069092


Entropy of this k_epoch: 0.19472351670265198
Average policy_loss of this k_epoch: -0.09534262120723724
KL Divergence Average Loss: 0.0581577830016613
Total Loss of this k_epoch: -0.10449721664190292






Entropy of this k_epoch: 0.19757641851902008
Average policy_loss of this k_epoch: -0.0936025008559227
KL Divergence Average Loss: 0.07462123036384583
Total Loss of this k_epoch: -0.10273510962724686


Entropy of this k_epoch: 0.18311621248722076
Average policy_loss of this k_epoch: -0.09725650399923325
KL Divergence Average Loss: 0.056971896439790726
Total Loss of this k_epoch: -0.10584259778261185






Entropy of this k_epoch: 0.19532567262649536
Average policy_loss of this k_epoch: -0.09409818798303604
KL Divergence Average Loss: 0.05235764756798744
Total Loss of this k_epoch: -0.10334089398384094






Entropy of this k_epoch: 0.1997067928314209
Average policy_loss of this k_epoch: -0.004047825932502747
KL Divergence Average Loss: 0.053514011204242706
Total Loss of this k_epoch: -0.013498025946319103


Entropy of this k_epoch: 0.17383794486522675
Average policy_loss of this k_epoch: -0.09825742244720459
KL Divergence Average Loss: 0.06219838187098503
Total Loss of this k_epoch: -0.10632734000682831


Entropy of this k_epoch: 0.1853158324956894
Average policy_loss of this k_epoch: -0.033316101878881454
KL Divergence Average Loss: 0.06605218350887299
Total Loss of this k_epoch: -0.04192137345671654






Entropy of this k_epoch: 0.1575726866722107
Average policy_loss of this k_epoch: -0.09876863658428192
KL Divergence Average Loss: 0.07360520958900452
Total Loss of this k_epoch: -0.10591121762990952


Entropy of this k_epoch: 0.14906689524650574
Average policy_loss of this k_epoch: -0.0984460785984993
KL Divergence Average Loss: 0.08006350696086884
Total Loss of this k_epoch: -0.10509879142045975






Entropy of this k_epoch: 0.15946745872497559
Average policy_loss of this k_epoch: -0.0827239602804184
KL Divergence Average Loss: 0.07713056355714798
Total Loss of this k_epoch: -0.08992602676153183




Epoch 3/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.53it/s]


Entropy of this k_epoch: 0.13157354295253754
Average policy_loss of this k_epoch: -0.09831956028938293
KL Divergence Average Loss: 0.09663420170545578
Total Loss of this k_epoch: -0.10393189638853073


Entropy of this k_epoch: 0.15812695026397705
Average policy_loss of this k_epoch: -0.08903779089450836
KL Divergence Average Loss: 0.1956087052822113
Total Loss of this k_epoch: -0.09498804807662964

Last k_epoch stats:
Loss: -0.0949880 | Ratio: 0.9736908 | Entropy Term: 0.1581270


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):   9%|▉         | 3/32 [00:16<02:35,  5.37s/it]

Entire Validation Dataset Accuracy: 0.8854| 170.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200,
        0.2200], device='cuda:0')





Entropy of this k_epoch: 0.1301746964454651
Average policy_loss of this k_epoch: -0.033228203654289246
KL Divergence Average Loss: 0.017846370115876198
Total Loss of this k_epoch: -0.039558473974466324






Entropy of this k_epoch: 0.16347090899944305
Average policy_loss of this k_epoch: -0.028607536107301712
KL Divergence Average Loss: 0.14305487275123596
Total Loss of this k_epoch: -0.035350531339645386






Entropy of this k_epoch: 0.14816570281982422
Average policy_loss of this k_epoch: -0.030946416780352592
KL Divergence Average Loss: 0.027604609727859497
Total Loss of this k_epoch: -0.03807865455746651


Entropy of this k_epoch: 0.1563362181186676
Average policy_loss of this k_epoch: -0.01417478546500206
KL Divergence Average Loss: 0.08250539004802704
Total Loss of this k_epoch: -0.021166542544960976


Entropy of this k_epoch: 0.12832465767860413
Average policy_loss of this k_epoch: -0.03204391896724701
KL Divergence Average Loss: 0.04205356538295746
Total Loss of this k_epoch: -0.038039613515138626






Entropy of this k_epoch: 0.11646458506584167
Average policy_loss of this k_epoch: -0.0338028185069561
KL Divergence Average Loss: 0.038068678230047226
Total Loss of this k_epoch: -0.0392453595995903


Entropy of this k_epoch: 0.10774872452020645
Average policy_loss of this k_epoch: -0.03458211198449135
KL Divergence Average Loss: 0.014018493704497814
Total Loss of this k_epoch: -0.03982936218380928






Entropy of this k_epoch: 0.07912621647119522
Average policy_loss of this k_epoch: -0.036777764558792114
KL Divergence Average Loss: 0.02486012503504753
Total Loss of this k_epoch: -0.04048547521233559






Entropy of this k_epoch: 0.06959709525108337
Average policy_loss of this k_epoch: -0.037318795919418335
KL Divergence Average Loss: 0.04004332423210144
Total Loss of this k_epoch: -0.04039821773767471


Entropy of this k_epoch: 0.06947155296802521
Average policy_loss of this k_epoch: -0.03774527460336685
KL Divergence Average Loss: 0.02955060452222824
Total Loss of this k_epoch: -0.0409233458340168


Entropy of this k_epoch: 0.062361638993024826
Average policy_loss of this k_epoch: -0.03829381614923477
KL Divergence Average Loss: 0.031971871852874756
Total Loss of this k_epoch: -0.04109217971563339






Entropy of this k_epoch: 0.07002130895853043
Average policy_loss of this k_epoch: -0.037628453224897385
KL Divergence Average Loss: 0.02895408309996128
Total Loss of this k_epoch: -0.04083997756242752


Entropy of this k_epoch: 0.07481864839792252
Average policy_loss of this k_epoch: -0.037009723484516144
KL Divergence Average Loss: 0.03232507407665253
Total Loss of this k_epoch: -0.04042740538716316






Entropy of this k_epoch: 0.07760177552700043
Average policy_loss of this k_epoch: -0.03615590184926987
KL Divergence Average Loss: 0.03241066262125969
Total Loss of this k_epoch: -0.039711881428956985






Entropy of this k_epoch: 0.05323678255081177
Average policy_loss of this k_epoch: -0.03577053174376488
KL Divergence Average Loss: 0.07385797798633575
Total Loss of this k_epoch: -0.03769379109144211


Entropy of this k_epoch: 0.04670688882470131
Average policy_loss of this k_epoch: -0.03739651292562485
KL Divergence Average Loss: 0.0569668710231781
Total Loss of this k_epoch: -0.03916218876838684


Entropy of this k_epoch: 0.0371830016374588
Average policy_loss of this k_epoch: -0.03907804191112518
KL Divergence Average Loss: 0.0451650507748127
Total Loss of this k_epoch: -0.04048554226756096






Entropy of this k_epoch: 0.021120883524417877
Average policy_loss of this k_epoch: -0.03972216323018074
KL Divergence Average Loss: 0.05342836305499077
Total Loss of this k_epoch: -0.04024392366409302


Entropy of this k_epoch: 0.020031806081533432
Average policy_loss of this k_epoch: -0.039733223617076874
KL Divergence Average Loss: 0.055353786796331406
Total Loss of this k_epoch: -0.040181275457143784





Epoch 4/32 (Inner K-Epochs):  31%|███▏      | 20/64 [00:01<00:03, 12.44it/s]

Entropy of this k_epoch: 0.026063892990350723
Average policy_loss of this k_epoch: -0.03913237154483795
KL Divergence Average Loss: 0.05811291188001633
Total Loss of this k_epoch: -0.03985443711280823



[A


Entropy of this k_epoch: 0.018549630418419838
Average policy_loss of this k_epoch: -0.03966226056218147
KL Divergence Average Loss: 0.056693919003009796
Total Loss of this k_epoch: -0.040022801607847214


Entropy of this k_epoch: 0.024494320154190063
Average policy_loss of this k_epoch: -0.039400964975357056
KL Divergence Average Loss: 0.05708397552371025
Total Loss of this k_epoch: -0.040054839104413986






Entropy of this k_epoch: 0.018572110682725906
Average policy_loss of this k_epoch: -0.039631523191928864
KL Divergence Average Loss: 0.05812634527683258
Total Loss of this k_epoch: -0.039978865534067154


Entropy of this k_epoch: 0.030535470694303513
Average policy_loss of this k_epoch: -0.03885578736662865
KL Divergence Average Loss: 0.06927137076854706
Total Loss of this k_epoch: -0.03968984633684158


Entropy of this k_epoch: 0.039438389241695404
Average policy_loss of this k_epoch: -0.0371970608830452
KL Divergence Average Loss: 0.1383129358291626
Total Loss of this k_epoch: -0.0377858504652977






Entropy of this k_epoch: 0.02140328660607338
Average policy_loss of this k_epoch: -0.03974375128746033
KL Divergence Average Loss: 0.05385216325521469
Total Loss of this k_epoch: -0.040275394916534424


Entropy of this k_epoch: 0.027979562059044838
Average policy_loss of this k_epoch: -0.039297834038734436
KL Divergence Average Loss: 0.05329325050115585
Total Loss of this k_epoch: -0.040163878351449966


Entropy of this k_epoch: 0.02647377923130989
Average policy_loss of this k_epoch: -0.039644792675971985
KL Divergence Average Loss: 0.050838954746723175
Total Loss of this k_epoch: -0.04046009108424187






Entropy of this k_epoch: 0.04022056609392166
Average policy_loss of this k_epoch: -0.039191216230392456
KL Divergence Average Loss: 0.0431019589304924
Total Loss of this k_epoch: -0.04077122360467911


Entropy of this k_epoch: 0.049001291394233704
Average policy_loss of this k_epoch: -0.03838396817445755
KL Divergence Average Loss: 0.039139196276664734
Total Loss of this k_epoch: -0.04044264182448387


Entropy of this k_epoch: 0.05317951738834381
Average policy_loss of this k_epoch: -0.03835454210639
KL Divergence Average Loss: 0.04025238752365112
Total Loss of this k_epoch: -0.040610991418361664






Entropy of this k_epoch: 0.06621498614549637
Average policy_loss of this k_epoch: -0.03692695498466492
KL Divergence Average Loss: 0.04108646884560585
Total Loss of this k_epoch: -0.03982684016227722


Entropy of this k_epoch: 0.06005602702498436
Average policy_loss of this k_epoch: -0.0378476157784462
KL Divergence Average Loss: 0.03328332677483559
Total Loss of this k_epoch: -0.04051758348941803


Entropy of this k_epoch: 0.07799021154642105
Average policy_loss of this k_epoch: 0.014878783375024796
KL Divergence Average Loss: 0.02729342132806778
Total Loss of this k_epoch: 0.0112522067502141






Entropy of this k_epoch: 0.05941415950655937
Average policy_loss of this k_epoch: -0.03862768039107323
KL Divergence Average Loss: 0.0315459743142128
Total Loss of this k_epoch: -0.04128292575478554


Entropy of this k_epoch: 0.05149924382567406
Average policy_loss of this k_epoch: -0.038746729493141174
KL Divergence Average Loss: 0.03752448037266731
Total Loss of this k_epoch: -0.04094644635915756


Entropy of this k_epoch: 0.043893907219171524
Average policy_loss of this k_epoch: -0.039191484451293945
KL Divergence Average Loss: 0.03978646919131279
Total Loss of this k_epoch: -0.040988314896821976






Entropy of this k_epoch: 0.045680075883865356
Average policy_loss of this k_epoch: -0.03893487900495529
KL Divergence Average Loss: 0.03873085603117943
Total Loss of this k_epoch: -0.04083157703280449


Entropy of this k_epoch: 0.06305740773677826
Average policy_loss of this k_epoch: -0.03448828309774399
KL Divergence Average Loss: 0.2324444204568863
Total Loss of this k_epoch: -0.03531670942902565


Entropy of this k_epoch: 0.05239167809486389
Average policy_loss of this k_epoch: -0.03811575099825859
KL Divergence Average Loss: 0.03556349128484726
Total Loss of this k_epoch: -0.04037969931960106






Entropy of this k_epoch: 0.05226512998342514
Average policy_loss of this k_epoch: -0.038797635585069656
KL Divergence Average Loss: 0.035110168159008026
Total Loss of this k_epoch: -0.041059792041778564


Entropy of this k_epoch: 0.05215800926089287
Average policy_loss of this k_epoch: -0.03901180624961853
KL Divergence Average Loss: 0.034975774586200714
Total Loss of this k_epoch: -0.041269950568675995


Entropy of this k_epoch: 0.06193090230226517
Average policy_loss of this k_epoch: -0.03834659233689308
KL Divergence Average Loss: 0.03168638050556183
Total Loss of this k_epoch: -0.041126273572444916






Entropy of this k_epoch: 0.06381635367870331
Average policy_loss of this k_epoch: -0.03806173801422119
KL Divergence Average Loss: 0.029437169432640076
Total Loss of this k_epoch: -0.040958184748888016


Entropy of this k_epoch: 0.068479984998703
Average policy_loss of this k_epoch: -0.03798650950193405
KL Divergence Average Loss: 0.02820558100938797
Total Loss of this k_epoch: -0.041128452867269516


Entropy of this k_epoch: 0.07790683209896088
Average policy_loss of this k_epoch: -0.03697908669710159
KL Divergence Average Loss: 0.024186458438634872
Total Loss of this k_epoch: -0.04063256457448006






Entropy of this k_epoch: 0.06859692186117172
Average policy_loss of this k_epoch: -0.037634797394275665
KL Divergence Average Loss: 0.029378745704889297
Total Loss of this k_epoch: -0.04077085480093956


Entropy of this k_epoch: 0.07091503590345383
Average policy_loss of this k_epoch: -0.036933500319719315
KL Divergence Average Loss: 0.02710953913629055
Total Loss of this k_epoch: -0.040208153426647186


Entropy of this k_epoch: 0.06784185767173767
Average policy_loss of this k_epoch: -0.037896886467933655
KL Divergence Average Loss: 0.029084965586662292
Total Loss of this k_epoch: -0.04099813103675842






Entropy of this k_epoch: 0.05918075144290924
Average policy_loss of this k_epoch: -0.036311231553554535
KL Divergence Average Loss: 0.058787751942873
Total Loss of this k_epoch: -0.03868239372968674


Entropy of this k_epoch: 0.04680426046252251
Average policy_loss of this k_epoch: -0.03892282396554947
KL Divergence Average Loss: 0.04414917528629303
Total Loss of this k_epoch: -0.04082154482603073


Entropy of this k_epoch: 0.039726193994283676
Average policy_loss of this k_epoch: -0.03915175050497055
KL Divergence Average Loss: 0.04433497413992882
Total Loss of this k_epoch: -0.040694709867239






Entropy of this k_epoch: 0.03768894448876381
Average policy_loss of this k_epoch: -0.03907852619886398
KL Divergence Average Loss: 0.0492774173617363
Total Loss of this k_epoch: -0.040470197796821594


Entropy of this k_epoch: 0.04311061650514603
Average policy_loss of this k_epoch: -0.03786233440041542
KL Divergence Average Loss: 0.08687467873096466
Total Loss of this k_epoch: -0.039149120450019836


Entropy of this k_epoch: 0.03564191609621048
Average policy_loss of this k_epoch: -0.03918622434139252
KL Divergence Average Loss: 0.05164327472448349
Total Loss of this k_epoch: -0.040451887995004654






Entropy of this k_epoch: 0.036834798753261566
Average policy_loss of this k_epoch: -0.0391085147857666
KL Divergence Average Loss: 0.056619271636009216
Total Loss of this k_epoch: -0.04038406163454056


Entropy of this k_epoch: 0.036623284220695496
Average policy_loss of this k_epoch: -0.039186038076877594
KL Divergence Average Loss: 0.0454799123108387
Total Loss of this k_epoch: -0.040562402456998825


Entropy of this k_epoch: 0.03869873657822609
Average policy_loss of this k_epoch: -0.0391557514667511
KL Divergence Average Loss: 0.04372229427099228
Total Loss of this k_epoch: -0.04065346717834473






Entropy of this k_epoch: 0.04605376720428467
Average policy_loss of this k_epoch: -0.038832828402519226
KL Divergence Average Loss: 0.044644180685281754
Total Loss of this k_epoch: -0.040689073503017426


Entropy of this k_epoch: 0.05293744057416916
Average policy_loss of this k_epoch: -0.03866098076105118
KL Divergence Average Loss: 0.04059034585952759
Total Loss of this k_epoch: -0.040901947766542435


Entropy of this k_epoch: 0.05526750907301903
Average policy_loss of this k_epoch: -0.03856264054775238
KL Divergence Average Loss: 0.03505294770002365
Total Loss of this k_epoch: -0.04097548499703407




Epoch 4/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.47it/s]

Entropy of this k_epoch: 0.05729778856039047
Average policy_loss of this k_epoch: -0.038267191499471664
KL Divergence Average Loss: 0.03357912600040436
Total Loss of this k_epoch: -0.04079629108309746


Entropy of this k_epoch: 0.06157205253839493
Average policy_loss of this k_epoch: -0.0379597432911396
KL Divergence Average Loss: 0.032157063484191895
Total Loss of this k_epoch: -0.04071677476167679


Entropy of this k_epoch: 0.0675969049334526
Average policy_loss of this k_epoch: -0.03789331018924713
KL Divergence Average Loss: 0.028698259964585304
Total Loss of this k_epoch: -0.04098617285490036

Last k_epoch stats:
Loss: -0.0409862 | Ratio: 1.0276721 | Entropy Term: 0.0675969



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  12%|█▎        | 4/32 [00:21<02:30,  5.38s/it]

Entire Validation Dataset Accuracy: 0.8958| 172.0 / 192.0 samples
old_predictions: 
tensor([0, 1, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 0., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([ 0.2562, -3.8426,  0.2562,  0.2562,  0.2562,  0.2562,  0.2562,  0.2562,
         0.2562,  0.2562], device='cuda:0')





Entropy of this k_epoch: 0.06993229687213898
Average policy_loss of this k_epoch: -0.0394413135945797
KL Divergence Average Loss: 0.005871832370758057
Total Loss of this k_epoch: -0.04287920892238617






Entropy of this k_epoch: 0.07998059689998627
Average policy_loss of this k_epoch: -0.007148057222366333
KL Divergence Average Loss: 0.003182754386216402
Total Loss of this k_epoch: -0.011115259490907192






Entropy of this k_epoch: 0.07574838399887085
Average policy_loss of this k_epoch: 0.04595537856221199
KL Divergence Average Loss: 0.005428951699286699
Total Loss of this k_epoch: 0.042222246527671814


Entropy of this k_epoch: 0.05387880280613899
Average policy_loss of this k_epoch: -0.04039204865694046
KL Divergence Average Loss: 0.009836314246058464
Total Loss of this k_epoch: -0.042987626045942307


Entropy of this k_epoch: 0.05093960464000702
Average policy_loss of this k_epoch: -0.04043595492839813
KL Divergence Average Loss: 0.011928927153348923
Total Loss of this k_epoch: -0.042863648384809494






Entropy of this k_epoch: 0.05147688835859299
Average policy_loss of this k_epoch: -0.03903663158416748
KL Divergence Average Loss: 0.08582238852977753
Total Loss of this k_epoch: -0.04075225070118904


Entropy of this k_epoch: 0.038614995777606964
Average policy_loss of this k_epoch: -0.04104935750365257
KL Divergence Average Loss: 0.02248981222510338
Total Loss of this k_epoch: -0.042755208909511566






Entropy of this k_epoch: 0.0340704470872879
Average policy_loss of this k_epoch: -0.04132479056715965
KL Divergence Average Loss: 0.021837040781974792
Total Loss of this k_epoch: -0.04280994459986687






Entropy of this k_epoch: 0.030504591763019562
Average policy_loss of this k_epoch: -0.04129994660615921
KL Divergence Average Loss: 0.017121799290180206
Total Loss of this k_epoch: -0.04265395924448967


Entropy of this k_epoch: 0.025295739993453026
Average policy_loss of this k_epoch: -0.04152938723564148
KL Divergence Average Loss: 0.020801903679966927
Total Loss of this k_epoch: -0.042586155235767365


Entropy of this k_epoch: 0.030836429446935654
Average policy_loss of this k_epoch: -0.0412575863301754
KL Divergence Average Loss: 0.018602216616272926
Total Loss of this k_epoch: -0.04261338710784912






Entropy of this k_epoch: 0.0293462835252285
Average policy_loss of this k_epoch: -0.04142380133271217
KL Divergence Average Loss: 0.018162932246923447
Total Loss of this k_epoch: -0.042709484696388245


Entropy of this k_epoch: 0.03278932720422745
Average policy_loss of this k_epoch: -0.04129894822835922
KL Divergence Average Loss: 0.017352212220430374
Total Loss of this k_epoch: -0.04276489466428757






Entropy of this k_epoch: 0.02652374468743801
Average policy_loss of this k_epoch: -0.04154064878821373
KL Divergence Average Loss: 0.01947181299328804
Total Loss of this k_epoch: -0.04267212003469467






Entropy of this k_epoch: 0.03351932764053345
Average policy_loss of this k_epoch: -0.041267018765211105
KL Divergence Average Loss: 0.020926786586642265
Total Loss of this k_epoch: -0.04273371770977974


Entropy of this k_epoch: 0.030849002301692963
Average policy_loss of this k_epoch: -0.041216470301151276
KL Divergence Average Loss: 0.023242928087711334
Total Loss of this k_epoch: -0.0425264909863472


Entropy of this k_epoch: 0.029036497697234154
Average policy_loss of this k_epoch: -0.04142719879746437
KL Divergence Average Loss: 0.01838758960366249
Total Loss of this k_epoch: -0.042695146054029465






Entropy of this k_epoch: 0.030737226828932762
Average policy_loss of this k_epoch: -0.04129910096526146
KL Divergence Average Loss: 0.017959563061594963
Total Loss of this k_epoch: -0.042656365782022476


Entropy of this k_epoch: 0.0312097929418087
Average policy_loss of this k_epoch: -0.04130736365914345
KL Divergence Average Loss: 0.017947867512702942
Total Loss of this k_epoch: -0.04268837720155716






Entropy of this k_epoch: 0.03717893734574318
Average policy_loss of this k_epoch: -0.04096696525812149
KL Divergence Average Loss: 0.015249370597302914
Total Loss of this k_epoch: -0.042673416435718536






Entropy of this k_epoch: 0.034877367317676544
Average policy_loss of this k_epoch: -0.04107191786170006
KL Divergence Average Loss: 0.017093485221266747
Total Loss of this k_epoch: -0.04264485090970993


Entropy of this k_epoch: 0.03892980515956879
Average policy_loss of this k_epoch: -0.04105687513947487
KL Divergence Average Loss: 0.014144821092486382
Total Loss of this k_epoch: -0.04286191612482071


Entropy of this k_epoch: 0.03625383600592613
Average policy_loss of this k_epoch: -0.04106968268752098
KL Divergence Average Loss: 0.015398266725242138
Total Loss of this k_epoch: -0.04272839426994324






Entropy of this k_epoch: 0.04514436423778534
Average policy_loss of this k_epoch: -0.0406988188624382
KL Divergence Average Loss: 0.01403200812637806
Total Loss of this k_epoch: -0.042815715074539185


Entropy of this k_epoch: 0.043079495429992676
Average policy_loss of this k_epoch: -0.04078906774520874
KL Divergence Average Loss: 0.012533450499176979
Total Loss of this k_epoch: -0.04281770810484886






Entropy of this k_epoch: 0.03929579257965088
Average policy_loss of this k_epoch: -0.04100162163376808
KL Divergence Average Loss: 0.013350753113627434
Total Loss of this k_epoch: -0.04283290356397629






Entropy of this k_epoch: 0.05416416376829147
Average policy_loss of this k_epoch: -0.03762880712747574
KL Divergence Average Loss: 0.0444224551320076
Total Loss of this k_epoch: -0.03989278897643089


Entropy of this k_epoch: 0.048899345099925995
Average policy_loss of this k_epoch: -0.03739502280950546
KL Divergence Average Loss: 0.010323021560907364
Total Loss of this k_epoch: -0.039736758917570114


Entropy of this k_epoch: 0.040727388113737106
Average policy_loss of this k_epoch: -0.040954090654850006
KL Divergence Average Loss: 0.01281499769538641
Total Loss of this k_epoch: -0.042862311005592346






Entropy of this k_epoch: 0.04047420993447304
Average policy_loss of this k_epoch: -0.040950432419776917
KL Divergence Average Loss: 0.01416882686316967
Total Loss of this k_epoch: -0.04283245652914047


Entropy of this k_epoch: 0.03875900059938431
Average policy_loss of this k_epoch: -0.04108981788158417
KL Divergence Average Loss: 0.015018372796475887
Total Loss of this k_epoch: -0.04287758469581604






Entropy of this k_epoch: 0.04057179391384125
Average policy_loss of this k_epoch: -0.03747716546058655
KL Divergence Average Loss: 0.16832265257835388
Total Loss of this k_epoch: -0.037822525948286057






Entropy of this k_epoch: 0.04482191801071167
Average policy_loss of this k_epoch: -0.040616199374198914
KL Divergence Average Loss: 0.02685738354921341
Total Loss of this k_epoch: -0.042588721960783005


Entropy of this k_epoch: 0.05392013490200043
Average policy_loss of this k_epoch: -0.03988267481327057
KL Divergence Average Loss: 0.058811116963624954
Total Loss of this k_epoch: -0.04199057072401047


Entropy of this k_epoch: 0.04021954536437988
Average policy_loss of this k_epoch: -0.04109695926308632
KL Divergence Average Loss: 0.014155430719256401
Total Loss of this k_epoch: -0.04296638444066048






Entropy of this k_epoch: 0.04420359432697296
Average policy_loss of this k_epoch: -0.0407310351729393
KL Divergence Average Loss: 0.012299297377467155
Total Loss of this k_epoch: -0.04281822219491005


Entropy of this k_epoch: 0.0482541024684906
Average policy_loss of this k_epoch: -0.04067257046699524
KL Divergence Average Loss: 0.010098814032971859
Total Loss of this k_epoch: -0.042984288185834885






Entropy of this k_epoch: 0.042830318212509155
Average policy_loss of this k_epoch: -0.0408933162689209
KL Divergence Average Loss: 0.011552717536687851
Total Loss of this k_epoch: -0.042919304221868515






Entropy of this k_epoch: 0.05204557627439499
Average policy_loss of this k_epoch: -0.040273867547512054
KL Divergence Average Loss: 0.009239697828888893
Total Loss of this k_epoch: -0.04278374835848808


Entropy of this k_epoch: 0.05900300294160843
Average policy_loss of this k_epoch: -0.0397876538336277
KL Divergence Average Loss: 0.008055014535784721
Total Loss of this k_epoch: -0.04265725240111351


Entropy of this k_epoch: 0.06295877695083618
Average policy_loss of this k_epoch: -0.039022114127874374
KL Divergence Average Loss: 0.01115074660629034
Total Loss of this k_epoch: -0.042058542370796204






Entropy of this k_epoch: 0.05214611440896988
Average policy_loss of this k_epoch: -0.0403265543282032
KL Divergence Average Loss: 0.012974822893738747
Total Loss of this k_epoch: -0.042804110795259476


Entropy of this k_epoch: 0.05398500710725784
Average policy_loss of this k_epoch: -0.04035113751888275
KL Divergence Average Loss: 0.009295695461332798
Total Loss of this k_epoch: -0.04295743256807327






Entropy of this k_epoch: 0.0570223368704319
Average policy_loss of this k_epoch: -0.036252617835998535
KL Divergence Average Loss: 0.06245286017656326
Total Loss of this k_epoch: -0.03847920522093773






Entropy of this k_epoch: 0.04431299492716789
Average policy_loss of this k_epoch: -0.04072726517915726
KL Divergence Average Loss: 0.011637609452009201
Total Loss of this k_epoch: -0.042826540768146515


Entropy of this k_epoch: 0.04219385236501694
Average policy_loss of this k_epoch: -0.04080570489168167
KL Divergence Average Loss: 0.012787354178726673
Total Loss of this k_epoch: -0.042787522077560425


Entropy of this k_epoch: 0.04403068870306015
Average policy_loss of this k_epoch: -0.040705692023038864
KL Divergence Average Loss: 0.012208941392600536
Total Loss of this k_epoch: -0.04278513789176941






Entropy of this k_epoch: 0.03768038749694824
Average policy_loss of this k_epoch: -0.04106489568948746
KL Divergence Average Loss: 0.01376304216682911
Total Loss of this k_epoch: -0.04281128570437431


Entropy of this k_epoch: 0.045823074877262115
Average policy_loss of this k_epoch: -0.040404051542282104
KL Divergence Average Loss: 0.016445064917206764
Total Loss of this k_epoch: -0.04253075644373894






Entropy of this k_epoch: 0.036024678498506546
Average policy_loss of this k_epoch: -0.041087277233600616
KL Divergence Average Loss: 0.01489631924778223
Total Loss of this k_epoch: -0.04273954778909683






Entropy of this k_epoch: 0.038650646805763245
Average policy_loss of this k_epoch: -0.04075135290622711
KL Divergence Average Loss: 0.017869506031274796
Total Loss of this k_epoch: -0.04250518977642059


Entropy of this k_epoch: 0.0433785505592823
Average policy_loss of this k_epoch: -0.038986556231975555
KL Divergence Average Loss: 0.1828802227973938
Total Loss of this k_epoch: -0.039326682686805725


Entropy of this k_epoch: 0.03614657372236252
Average policy_loss of this k_epoch: -0.0412336066365242
KL Divergence Average Loss: 0.01377074420452118
Total Loss of this k_epoch: -0.04290322586894035






Entropy of this k_epoch: 0.034271240234375
Average policy_loss of this k_epoch: -0.04139082506299019
KL Divergence Average Loss: 0.015050476416945457
Total Loss of this k_epoch: -0.04295388236641884


Entropy of this k_epoch: 0.04174960404634476
Average policy_loss of this k_epoch: -0.04089972749352455
KL Divergence Average Loss: 0.013835219666361809
Total Loss of this k_epoch: -0.0428488552570343






Entropy of this k_epoch: 0.04142126441001892
Average policy_loss of this k_epoch: -0.04090743139386177
KL Divergence Average Loss: 0.012889789417386055
Total Loss of this k_epoch: -0.042849596589803696






Entropy of this k_epoch: 0.056187957525253296
Average policy_loss of this k_epoch: -0.03844072297215462
KL Divergence Average Loss: 0.03616217151284218
Total Loss of this k_epoch: -0.04088849946856499


Entropy of this k_epoch: 0.044760189950466156
Average policy_loss of this k_epoch: -0.04055847227573395
KL Divergence Average Loss: 0.01062722411006689
Total Loss of this k_epoch: -0.042690210044384


Entropy of this k_epoch: 0.04111010208725929
Average policy_loss of this k_epoch: -0.04093196243047714
KL Divergence Average Loss: 0.01260465383529663
Total Loss of this k_epoch: -0.042861420661211014






Entropy of this k_epoch: 0.04611895978450775
Average policy_loss of this k_epoch: -0.04078661650419235
KL Divergence Average Loss: 0.012403716333210468
Total Loss of this k_epoch: -0.04296852648258209


Entropy of this k_epoch: 0.04034380614757538
Average policy_loss of this k_epoch: -0.04112865403294563
KL Divergence Average Loss: 0.011954545974731445
Total Loss of this k_epoch: -0.04302629828453064






Entropy of this k_epoch: 0.041659679263830185
Average policy_loss of this k_epoch: -0.040860701352357864
KL Divergence Average Loss: 0.01206972822546959
Total Loss of this k_epoch: -0.04282299056649208




Epoch 5/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.47it/s]


Entropy of this k_epoch: 0.04070349782705307
Average policy_loss of this k_epoch: -0.04113532602787018
KL Divergence Average Loss: 0.012969402596354485
Total Loss of this k_epoch: -0.04304080829024315


Entropy of this k_epoch: 0.04068286716938019
Average policy_loss of this k_epoch: -0.04063553363084793
KL Divergence Average Loss: 0.038023870438337326
Total Loss of this k_epoch: -0.04228943586349487

Last k_epoch stats:
Loss: -0.0422894 | Ratio: 0.9823074 | Entropy Term: 0.0406829


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  16%|█▌        | 5/32 [00:26<02:25,  5.38s/it]

Entire Validation Dataset Accuracy: 0.8802| 169.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200,
        0.2200], device='cuda:0')





Entropy of this k_epoch: 0.03382135182619095
Average policy_loss of this k_epoch: -0.0292573980987072
KL Divergence Average Loss: 0.0022678771056234837
Total Loss of this k_epoch: -0.03092578612267971






Entropy of this k_epoch: 0.04337381571531296
Average policy_loss of this k_epoch: -0.028807278722524643
KL Divergence Average Loss: 0.008811685256659985
Total Loss of this k_epoch: -0.030887853354215622






Entropy of this k_epoch: 0.038960929960012436
Average policy_loss of this k_epoch: -0.02879345417022705
KL Divergence Average Loss: 0.0014187663327902555
Total Loss of this k_epoch: -0.030727311968803406


Entropy of this k_epoch: 0.03890542313456535
Average policy_loss of this k_epoch: -0.028809022158384323
KL Divergence Average Loss: 0.0035031780134886503
Total Loss of this k_epoch: -0.03071926161646843


Entropy of this k_epoch: 0.03454349935054779
Average policy_loss of this k_epoch: -0.029206443578004837
KL Divergence Average Loss: 0.002994667738676071
Total Loss of this k_epoch: -0.03090367093682289






Entropy of this k_epoch: 0.029009606689214706
Average policy_loss of this k_epoch: -0.029583517462015152
KL Divergence Average Loss: 0.003092404454946518
Total Loss of this k_epoch: -0.0310030747205019


Entropy of this k_epoch: 0.0431404709815979
Average policy_loss of this k_epoch: -0.0279129296541214
KL Divergence Average Loss: 0.024616949260234833
Total Loss of this k_epoch: -0.02982378378510475






Entropy of this k_epoch: 0.027323726564645767
Average policy_loss of this k_epoch: -0.029523450881242752
KL Divergence Average Loss: 0.0031673163175582886
Total Loss of this k_epoch: -0.030857965350151062






Entropy of this k_epoch: 0.02423766441643238
Average policy_loss of this k_epoch: -0.029827222228050232
KL Divergence Average Loss: 0.004369727801531553
Total Loss of this k_epoch: -0.030995408073067665


Entropy of this k_epoch: 0.02695274166762829
Average policy_loss of this k_epoch: -0.029748454689979553
KL Divergence Average Loss: 0.008191373199224472
Total Loss of this k_epoch: -0.031014177948236465


Entropy of this k_epoch: 0.025269072502851486
Average policy_loss of this k_epoch: -0.029789485037326813
KL Divergence Average Loss: 0.005176381208002567
Total Loss of this k_epoch: -0.031001176685094833






Entropy of this k_epoch: 0.02205847203731537
Average policy_loss of this k_epoch: -0.029910050332546234
KL Divergence Average Loss: 0.012714024633169174
Total Loss of this k_epoch: -0.030885834246873856


Entropy of this k_epoch: 0.023538265377283096
Average policy_loss of this k_epoch: -0.029838301241397858
KL Divergence Average Loss: 0.023041879758238792
Total Loss of this k_epoch: -0.03078479692339897






Entropy of this k_epoch: 0.02012413926422596
Average policy_loss of this k_epoch: -0.030055183917284012
KL Divergence Average Loss: 0.006270579062402248
Total Loss of this k_epoch: -0.030998684465885162






Entropy of this k_epoch: 0.03347426652908325
Average policy_loss of this k_epoch: -0.0290139839053154
KL Divergence Average Loss: 0.06407538056373596
Total Loss of this k_epoch: -0.030046943575143814


Entropy of this k_epoch: 0.021934904158115387
Average policy_loss of this k_epoch: -0.02999712899327278
KL Divergence Average Loss: 0.008021140471100807
Total Loss of this k_epoch: -0.031013663858175278


Entropy of this k_epoch: 0.019992206245660782
Average policy_loss of this k_epoch: -0.030119910836219788
KL Divergence Average Loss: 0.007407492026686668
Total Loss of this k_epoch: -0.031045446172356606






Entropy of this k_epoch: 0.020690767094492912
Average policy_loss of this k_epoch: -0.0300455242395401
KL Divergence Average Loss: 0.005394972860813141
Total Loss of this k_epoch: -0.031026113778352737


Entropy of this k_epoch: 0.02495814859867096
Average policy_loss of this k_epoch: -0.029798489063978195
KL Divergence Average Loss: 0.009722745046019554
Total Loss of this k_epoch: -0.030949167907238007






Entropy of this k_epoch: 0.028768911957740784
Average policy_loss of this k_epoch: -0.029517732560634613
KL Divergence Average Loss: 0.03048754297196865
Total Loss of this k_epoch: -0.030651303008198738






Entropy of this k_epoch: 0.025864291936159134
Average policy_loss of this k_epoch: -0.029667437076568604
KL Divergence Average Loss: 0.00338504696264863
Total Loss of this k_epoch: -0.03092680126428604


Entropy of this k_epoch: 0.024495642632246017
Average policy_loss of this k_epoch: -0.0298064686357975
KL Divergence Average Loss: 0.004375034943223
Total Loss of this k_epoch: -0.03098750114440918


Entropy of this k_epoch: 0.028207551687955856
Average policy_loss of this k_epoch: -0.029643148183822632
KL Divergence Average Loss: 0.0035875122994184494
Total Loss of this k_epoch: -0.03101765178143978






Entropy of this k_epoch: 0.0275227390229702
Average policy_loss of this k_epoch: -0.02965853363275528
KL Divergence Average Loss: 0.0040838452987372875
Total Loss of this k_epoch: -0.030993832275271416


Entropy of this k_epoch: 0.02441265992820263
Average policy_loss of this k_epoch: -0.029848922044038773
KL Divergence Average Loss: 0.004733105655759573
Total Loss of this k_epoch: -0.031022222712635994






Entropy of this k_epoch: 0.026542795822024345
Average policy_loss of this k_epoch: -0.029762301594018936
KL Divergence Average Loss: 0.004178961738944054
Total Loss of this k_epoch: -0.031047651544213295






Entropy of this k_epoch: 0.024214912205934525
Average policy_loss of this k_epoch: -0.029877714812755585
KL Divergence Average Loss: 0.0045170290395617485
Total Loss of this k_epoch: -0.031043289229273796


Entropy of this k_epoch: 0.025187324732542038
Average policy_loss of this k_epoch: -0.029851388186216354
KL Divergence Average Loss: 0.004964099265635014
Total Loss of this k_epoch: -0.031061112880706787


Entropy of this k_epoch: 0.028383051976561546
Average policy_loss of this k_epoch: -0.029720764607191086
KL Divergence Average Loss: 0.004990209359675646
Total Loss of this k_epoch: -0.031090015545487404






Entropy of this k_epoch: 0.041299786418676376
Average policy_loss of this k_epoch: -0.025641117244958878
KL Divergence Average Loss: 0.08236249536275864
Total Loss of this k_epoch: -0.026882482692599297


Entropy of this k_epoch: 0.032836370170116425
Average policy_loss of this k_epoch: -0.029432162642478943
KL Divergence Average Loss: 0.0028390721417963505
Total Loss of this k_epoch: -0.031045591458678246







Entropy of this k_epoch: 0.03523046523332596
Average policy_loss of this k_epoch: -0.027002934366464615
KL Divergence Average Loss: 0.05037693679332733
Total Loss of this k_epoch: -0.028260689228773117



Epoch 6/32 (Inner K-Epochs):  50%|█████     | 32/64 [00:02<00:02, 12.43it/s][A


Entropy of this k_epoch: 0.028366733342409134
Average policy_loss of this k_epoch: -0.029634833335876465
KL Divergence Average Loss: 0.003722370369359851
Total Loss of this k_epoch: -0.031015947461128235


Entropy of this k_epoch: 0.02249089814722538
Average policy_loss of this k_epoch: -0.029952209442853928
KL Divergence Average Loss: 0.005122393369674683
Total Loss of this k_epoch: -0.03102552890777588






Entropy of this k_epoch: 0.024784216657280922
Average policy_loss of this k_epoch: -0.029813792556524277
KL Divergence Average Loss: 0.006851383484899998
Total Loss of this k_epoch: -0.030984489247202873


Entropy of this k_epoch: 0.022208966314792633
Average policy_loss of this k_epoch: -0.029950719326734543
KL Divergence Average Loss: 0.004937674384564161
Total Loss of this k_epoch: -0.03101179003715515


Entropy of this k_epoch: 0.023749157786369324
Average policy_loss of this k_epoch: -0.02982689067721367
KL Divergence Average Loss: 0.004293033853173256
Total Loss of this k_epoch: -0.03097141906619072






Entropy of this k_epoch: 0.022267844527959824
Average policy_loss of this k_epoch: -0.029949326068162918
KL Divergence Average Loss: 0.005049685016274452
Total Loss of this k_epoch: -0.031012222170829773


Entropy of this k_epoch: 0.02609119564294815
Average policy_loss of this k_epoch: -0.029690761119127274
KL Divergence Average Loss: 0.024965163320302963
Total Loss of this k_epoch: -0.030745668336749077


Entropy of this k_epoch: 0.02238941192626953
Average policy_loss of this k_epoch: -0.029979873448610306
KL Divergence Average Loss: 0.006142538972198963
Total Loss of this k_epoch: -0.031037917360663414






Entropy of this k_epoch: 0.030928730964660645
Average policy_loss of this k_epoch: -0.028715912252664566
KL Divergence Average Loss: 0.057994671165943146
Total Loss of this k_epoch: -0.029682403430342674


Entropy of this k_epoch: 0.025175191462039948
Average policy_loss of this k_epoch: -0.0297965370118618
KL Divergence Average Loss: 0.00565169844776392
Total Loss of this k_epoch: -0.030998779460787773


Entropy of this k_epoch: 0.02172759547829628
Average policy_loss of this k_epoch: -0.029946327209472656
KL Divergence Average Loss: 0.00501569639891386
Total Loss of this k_epoch: -0.030982550233602524






Entropy of this k_epoch: 0.028383970260620117
Average policy_loss of this k_epoch: -0.029688015580177307
KL Divergence Average Loss: 0.0038282442837953568
Total Loss of this k_epoch: -0.03106893040239811


Entropy of this k_epoch: 0.025683222338557243
Average policy_loss of this k_epoch: -0.029750384390354156
KL Divergence Average Loss: 0.004492098931223154
Total Loss of this k_epoch: -0.030989624559879303


Entropy of this k_epoch: 0.024947302415966988
Average policy_loss of this k_epoch: -0.029859215021133423
KL Divergence Average Loss: 0.004316580016165972
Total Loss of this k_epoch: -0.031063415110111237






Entropy of this k_epoch: 0.028450170531868935
Average policy_loss of this k_epoch: -0.029627934098243713
KL Divergence Average Loss: 0.0036361636593937874
Total Loss of this k_epoch: -0.031014079228043556


Entropy of this k_epoch: 0.03208411484956741
Average policy_loss of this k_epoch: -0.029461678117513657
KL Divergence Average Loss: 0.0025821286253631115
Total Loss of this k_epoch: -0.03104006126523018


Entropy of this k_epoch: 0.028639640659093857
Average policy_loss of this k_epoch: -0.029728684574365616
KL Divergence Average Loss: 0.004520656540989876
Total Loss of this k_epoch: -0.031115461140871048






Entropy of this k_epoch: 0.03090820647776127
Average policy_loss of this k_epoch: -0.029517434537410736
KL Divergence Average Loss: 0.002904625842347741
Total Loss of this k_epoch: -0.031033799052238464


Entropy of this k_epoch: 0.029249824583530426
Average policy_loss of this k_epoch: -0.029640547931194305
KL Divergence Average Loss: 0.0032655238173902035
Total Loss of this k_epoch: -0.03107038326561451


Entropy of this k_epoch: 0.02924017794430256
Average policy_loss of this k_epoch: -0.02966398000717163
KL Divergence Average Loss: 0.0037181731313467026
Total Loss of this k_epoch: -0.031088806688785553






Entropy of this k_epoch: 0.028622262179851532
Average policy_loss of this k_epoch: -0.029715988785028458
KL Divergence Average Loss: 0.003960225731134415
Total Loss of this k_epoch: -0.031107500195503235


Entropy of this k_epoch: 0.030377008020877838
Average policy_loss of this k_epoch: -0.029611606150865555
KL Divergence Average Loss: 0.003591171931475401
Total Loss of this k_epoch: -0.031094545498490334


Entropy of this k_epoch: 0.03748359903693199
Average policy_loss of this k_epoch: -0.029188938438892365
KL Divergence Average Loss: 0.0024815599899739027
Total Loss of this k_epoch: -0.031038302928209305






Entropy of this k_epoch: 0.036273226141929626
Average policy_loss of this k_epoch: -0.02921561524271965
KL Divergence Average Loss: 0.002616016659885645
Total Loss of this k_epoch: -0.031003115698695183


Entropy of this k_epoch: 0.03579413890838623
Average policy_loss of this k_epoch: -0.029287483543157578
KL Divergence Average Loss: 0.002516587032005191
Total Loss of this k_epoch: -0.03105202503502369


Entropy of this k_epoch: 0.03554036095738411
Average policy_loss of this k_epoch: 0.0463520884513855
KL Divergence Average Loss: 0.003081155940890312
Total Loss of this k_epoch: 0.04460588097572327






Entropy of this k_epoch: 0.026967884972691536
Average policy_loss of this k_epoch: -0.029836386442184448
KL Divergence Average Loss: 0.0048125311732292175
Total Loss of this k_epoch: -0.031136656180024147


Entropy of this k_epoch: 0.026242602616548538
Average policy_loss of this k_epoch: -0.02989291399717331
KL Divergence Average Loss: 0.0050474731251597404
Total Loss of this k_epoch: -0.0311545692384243


Entropy of this k_epoch: 0.024353519082069397
Average policy_loss of this k_epoch: -0.02991212159395218
KL Divergence Average Loss: 0.004945229273289442
Total Loss of this k_epoch: -0.031080346554517746




Epoch 6/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.41it/s]


Entropy of this k_epoch: 0.022937307134270668
Average policy_loss of this k_epoch: -0.030023161321878433
KL Divergence Average Loss: 0.005740017164498568
Total Loss of this k_epoch: -0.031112628057599068


Entropy of this k_epoch: 0.022899985313415527
Average policy_loss of this k_epoch: -0.0299549363553524
KL Divergence Average Loss: 0.005104250740259886
Total Loss of this k_epoch: -0.031048893928527832


Entropy of this k_epoch: 0.023560239002108574
Average policy_loss of this k_epoch: -0.03000291809439659
KL Divergence Average Loss: 0.006631878204643726
Total Loss of this k_epoch: -0.03111460991203785

Last k_epoch stats:
Loss: -0.0311146 | Ratio: 0.9932213 | Entropy Term: 0.0235602


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  19%|█▉        | 6/32 [00:32<02:20,  5.39s/it]

Entire Validation Dataset Accuracy: 0.8958| 172.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.021818529814481735
Average policy_loss of this k_epoch: -8.557736873626709e-05
KL Divergence Average Loss: 0.0021091466769576073
Total Loss of this k_epoch: -0.0011554124066606164






Entropy of this k_epoch: 0.01832927018404007
Average policy_loss of this k_epoch: -0.00016599521040916443
KL Divergence Average Loss: 0.00045796402264386415
Total Loss of this k_epoch: -0.0010778791038319468






Entropy of this k_epoch: 0.01910686306655407
Average policy_loss of this k_epoch: -0.0001515522599220276
KL Divergence Average Loss: 0.0003326239821035415
Total Loss of this k_epoch: -0.0011035691713914275


Entropy of this k_epoch: 0.019428756088018417
Average policy_loss of this k_epoch: -0.00013072043657302856
KL Divergence Average Loss: 0.0004324297187849879
Total Loss of this k_epoch: -0.0010978339705616236


Entropy of this k_epoch: 0.019591189920902252
Average policy_loss of this k_epoch: -0.00014030933380126953
KL Divergence Average Loss: 0.0003546254010871053
Total Loss of this k_epoch: -0.00111632258631289






Entropy of this k_epoch: 0.020585350692272186
Average policy_loss of this k_epoch: -0.00011803954839706421
KL Divergence Average Loss: 0.0005034462665207684
Total Loss of this k_epoch: -0.0011422726092860103


Entropy of this k_epoch: 0.020453378558158875
Average policy_loss of this k_epoch: -0.0001214742660522461
KL Divergence Average Loss: 0.0005084427539259195
Total Loss of this k_epoch: -0.001139058731496334






Entropy of this k_epoch: 0.025603491812944412
Average policy_loss of this k_epoch: 2.166256308555603e-05
KL Divergence Average Loss: 0.0005536211538128555
Total Loss of this k_epoch: -0.0012529757805168629






Entropy of this k_epoch: 0.02425464801490307
Average policy_loss of this k_epoch: -1.8924474716186523e-06
KL Divergence Average Loss: 0.001202181214466691
Total Loss of this k_epoch: -0.00120260298717767


Entropy of this k_epoch: 0.025479372590780258
Average policy_loss of this k_epoch: 4.011765122413635e-05
KL Divergence Average Loss: 0.0007139774970710278
Total Loss of this k_epoch: -0.0012267112033441663






Entropy of this k_epoch: 0.024489549919962883
Average policy_loss of this k_epoch: -1.2263655662536621e-05
KL Divergence Average Loss: 0.000471282284706831
Total Loss of this k_epoch: -0.0012320283567532897


Entropy of this k_epoch: 0.025672903284430504
Average policy_loss of this k_epoch: 2.9437243938446045e-05
KL Divergence Average Loss: 0.00026413711020722985
Total Loss of this k_epoch: -0.0012515665730461478


Entropy of this k_epoch: 0.028564248234033585
Average policy_loss of this k_epoch: 0.00010280311107635498
KL Divergence Average Loss: 0.0011965306475758553
Total Loss of this k_epoch: -0.0013134439941495657






Entropy of this k_epoch: 0.03221941739320755
Average policy_loss of this k_epoch: 0.0002103373408317566
KL Divergence Average Loss: 0.0008020417299121618
Total Loss of this k_epoch: -0.0013926131650805473


Entropy of this k_epoch: 0.037309564650058746
Average policy_loss of this k_epoch: 0.000396728515625
KL Divergence Average Loss: 0.0014425215777009726
Total Loss of this k_epoch: -0.0014543244615197182


Entropy of this k_epoch: 0.036626074463129044
Average policy_loss of this k_epoch: 0.00035378336906433105
KL Divergence Average Loss: 0.0011615564581006765
Total Loss of this k_epoch: -0.0014659047592431307






Entropy of this k_epoch: 0.03806976228952408
Average policy_loss of this k_epoch: 0.0004150979220867157
KL Divergence Average Loss: 0.0017062315018847585
Total Loss of this k_epoch: -0.0014713279670104384


Entropy of this k_epoch: 0.04285638779401779
Average policy_loss of this k_epoch: 0.0005563832819461823
KL Divergence Average Loss: 0.002282428555190563
Total Loss of this k_epoch: -0.0015636117896065116


Entropy of this k_epoch: 0.048766374588012695
Average policy_loss of this k_epoch: 0.000760342925786972
KL Divergence Average Loss: 0.00367339001968503
Total Loss of this k_epoch: -0.0016412418335676193






Entropy of this k_epoch: 0.05479483678936958
Average policy_loss of this k_epoch: 0.0009607598185539246
KL Divergence Average Loss: 0.005141142755746841
Total Loss of this k_epoch: -0.0017275706632062793


Entropy of this k_epoch: 0.059333037585020065
Average policy_loss of this k_epoch: 0.0012066960334777832
KL Divergence Average Loss: 0.007299310062080622
Total Loss of this k_epoch: -0.0016869627870619297


Entropy of this k_epoch: 0.06705611944198608
Average policy_loss of this k_epoch: 0.0014368966221809387
KL Divergence Average Loss: 0.009394900873303413
Total Loss of this k_epoch: -0.0018219603225588799






Entropy of this k_epoch: 0.07267485558986664
Average policy_loss of this k_epoch: 0.0016787052154541016
KL Divergence Average Loss: 0.012576696462929249
Total Loss of this k_epoch: -0.001829270739108324


Entropy of this k_epoch: 0.07914245128631592
Average policy_loss of this k_epoch: 0.0019319802522659302
KL Divergence Average Loss: 0.015416236594319344
Total Loss of this k_epoch: -0.0018709800206124783


Entropy of this k_epoch: 0.08089327067136765
Average policy_loss of this k_epoch: 0.00200803205370903
KL Divergence Average Loss: 0.016341855749487877
Total Loss of this k_epoch: -0.0018732130993157625






Entropy of this k_epoch: 0.08995263278484344
Average policy_loss of this k_epoch: 0.0023939572274684906
KL Divergence Average Loss: 0.02168389782309532
Total Loss of this k_epoch: -0.0018868357874453068


Entropy of this k_epoch: 0.09857437014579773
Average policy_loss of this k_epoch: 0.002789415419101715
KL Divergence Average Loss: 0.02723408117890358
Total Loss of this k_epoch: -0.0018669625278562307


Entropy of this k_epoch: 0.09702006727457047
Average policy_loss of this k_epoch: 0.0027121640741825104
KL Divergence Average Loss: 0.02617100067436695
Total Loss of this k_epoch: -0.0018771295435726643






Entropy of this k_epoch: 0.0994524210691452
Average policy_loss of this k_epoch: 0.0027516409754753113
KL Divergence Average Loss: 0.02764427289366722
Total Loss of this k_epoch: -0.001944537740200758


Entropy of this k_epoch: 0.11032803356647491
Average policy_loss of this k_epoch: 0.0033119656145572662
KL Divergence Average Loss: 0.035121530294418335
Total Loss of this k_epoch: -0.001853221096098423


Entropy of this k_epoch: 0.10339153558015823
Average policy_loss of this k_epoch: 0.0029806941747665405
KL Divergence Average Loss: 0.030381960794329643
Total Loss of this k_epoch: -0.0018850632477551699






Entropy of this k_epoch: 0.10901564359664917
Average policy_loss of this k_epoch: 0.003200128674507141
KL Divergence Average Loss: 0.03437548130750656
Total Loss of this k_epoch: -0.0019068988040089607


Entropy of this k_epoch: 0.1125396341085434
Average policy_loss of this k_epoch: 0.0034471601247787476
KL Divergence Average Loss: 0.03735172003507614
Total Loss of this k_epoch: -0.0018063043244183064


Entropy of this k_epoch: 0.10572799295186996
Average policy_loss of this k_epoch: 0.003083854913711548
KL Divergence Average Loss: 0.03249582648277283
Total Loss of this k_epoch: -0.0018775865901261568






Entropy of this k_epoch: 0.10436873137950897
Average policy_loss of this k_epoch: 0.0029640942811965942
KL Divergence Average Loss: 0.03070523962378502
Total Loss of this k_epoch: -0.0019472897984087467


Entropy of this k_epoch: 0.09892824292182922
Average policy_loss of this k_epoch: 0.0027622729539871216
KL Divergence Average Loss: 0.027611227706074715
Total Loss of this k_epoch: -0.0019080268684774637


Entropy of this k_epoch: 0.10005015879869461
Average policy_loss of this k_epoch: 0.002729937434196472
KL Divergence Average Loss: 0.027051471173763275
Total Loss of this k_epoch: -0.0020020557567477226






Entropy of this k_epoch: 0.09210962057113647
Average policy_loss of this k_epoch: 0.002434689551591873
KL Divergence Average Loss: 0.022471092641353607
Total Loss of this k_epoch: -0.0019460804760456085


Entropy of this k_epoch: 0.10145122557878494
Average policy_loss of this k_epoch: 0.0028279870748519897
KL Divergence Average Loss: 0.028752995654940605
Total Loss of this k_epoch: -0.0019570444710552692


Entropy of this k_epoch: 0.09552615880966187
Average policy_loss of this k_epoch: 0.0025811120867729187
KL Divergence Average Loss: 0.024238189682364464
Total Loss of this k_epoch: -0.0019528139382600784






Entropy of this k_epoch: 0.09583371132612228
Average policy_loss of this k_epoch: 0.0026107057929039
KL Divergence Average Loss: 0.025391511619091034
Total Loss of this k_epoch: -0.0019270649645477533


Entropy of this k_epoch: 0.09271855652332306
Average policy_loss of this k_epoch: 0.0024651363492012024
KL Divergence Average Loss: 0.023018475621938705
Total Loss of this k_epoch: -0.001940606627613306


Entropy of this k_epoch: 0.09627360105514526
Average policy_loss of this k_epoch: 0.002571236342191696
KL Divergence Average Loss: 0.030134066939353943
Total Loss of this k_epoch: -0.001941103022545576






Entropy of this k_epoch: 0.08949045836925507
Average policy_loss of this k_epoch: 0.0022770315408706665
KL Divergence Average Loss: 0.02070721425116062
Total Loss of this k_epoch: -0.0019904193468391895


Entropy of this k_epoch: 0.08721702545881271
Average policy_loss of this k_epoch: 0.0021977536380290985
KL Divergence Average Loss: 0.019650768488645554
Total Loss of this k_epoch: -0.0019665900617837906


Entropy of this k_epoch: 0.09442803263664246
Average policy_loss of this k_epoch: 0.0025270991027355194
KL Divergence Average Loss: 0.024136826395988464
Total Loss of this k_epoch: -0.0019529343117028475






Entropy of this k_epoch: 0.08904799818992615
Average policy_loss of this k_epoch: 0.0022909268736839294
KL Divergence Average Loss: 0.02070932649075985
Total Loss of this k_epoch: -0.0019543797243386507


Entropy of this k_epoch: 0.09021586179733276
Average policy_loss of this k_epoch: 0.0023470893502235413
KL Divergence Average Loss: 0.020996933802962303
Total Loss of this k_epoch: -0.0019537347834557295


Entropy of this k_epoch: 0.0844082161784172
Average policy_loss of this k_epoch: 0.002035725861787796
KL Divergence Average Loss: 0.02040785923600197
Total Loss of this k_epoch: -0.00198060623370111






Entropy of this k_epoch: 0.08775199204683304
Average policy_loss of this k_epoch: 0.0022037960588932037
KL Divergence Average Loss: 0.019768834114074707
Total Loss of this k_epoch: -0.0019861154723912477


Entropy of this k_epoch: 0.08555750548839569
Average policy_loss of this k_epoch: 0.0021503381431102753
KL Divergence Average Loss: 0.01820554956793785
Total Loss of this k_epoch: -0.0019454816356301308


Entropy of this k_epoch: 0.08832387626171112
Average policy_loss of this k_epoch: 0.0022552981972694397
KL Divergence Average Loss: 0.02001296356320381
Total Loss of this k_epoch: -0.0019607660360634327






Entropy of this k_epoch: 0.08686644583940506
Average policy_loss of this k_epoch: 0.0021825507283210754
KL Divergence Average Loss: 0.020529648289084435
Total Loss of this k_epoch: -0.001955475192517042


Entropy of this k_epoch: 0.08816908299922943
Average policy_loss of this k_epoch: 0.0022408589720726013
KL Divergence Average Loss: 0.01942068710923195
Total Loss of this k_epoch: -0.0019733882509171963


Entropy of this k_epoch: 0.0909837931394577
Average policy_loss of this k_epoch: 0.0023442134261131287
KL Divergence Average Loss: 0.02155325561761856
Total Loss of this k_epoch: -0.0019894440192729235






Entropy of this k_epoch: 0.09343580901622772
Average policy_loss of this k_epoch: 0.0024748854339122772
KL Divergence Average Loss: 0.023378141224384308
Total Loss of this k_epoch: -0.0019631236791610718


Entropy of this k_epoch: 0.08983197808265686
Average policy_loss of this k_epoch: 0.00230519101023674
KL Divergence Average Loss: 0.021250443533062935
Total Loss of this k_epoch: -0.0019739032723009586


Entropy of this k_epoch: 0.09378892183303833
Average policy_loss of this k_epoch: 0.00241243839263916
KL Divergence Average Loss: 0.02695794776082039
Total Loss of this k_epoch: -0.0020074283238500357






Entropy of this k_epoch: 0.09596407413482666
Average policy_loss of this k_epoch: 0.0024917535483837128
KL Divergence Average Loss: 0.027699563652276993
Total Loss of this k_epoch: -0.002029454568400979


Entropy of this k_epoch: 0.10158447921276093
Average policy_loss of this k_epoch: 0.002812996506690979
KL Divergence Average Loss: 0.029328681528568268
Total Loss of this k_epoch: -0.001972940983250737


Entropy of this k_epoch: 0.09795069694519043
Average policy_loss of this k_epoch: 0.0026790425181388855
KL Divergence Average Loss: 0.026360221207141876
Total Loss of this k_epoch: -0.00195489008910954




Epoch 7/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.21it/s]


Entropy of this k_epoch: 0.1003180742263794
Average policy_loss of this k_epoch: 0.0027750730514526367
KL Divergence Average Loss: 0.02739596739411354
Total Loss of this k_epoch: -0.001966870855540037


Entropy of this k_epoch: 0.09691819548606873
Average policy_loss of this k_epoch: 0.0025689974427223206
KL Divergence Average Loss: 0.02618362568318844
Total Loss of this k_epoch: -0.0020150761120021343


Entropy of this k_epoch: 0.0979691594839096
Average policy_loss of this k_epoch: 0.0025718584656715393
KL Divergence Average Loss: 0.029752636328339577
Total Loss of this k_epoch: -0.0020290734246373177

Last k_epoch stats:
Loss: -0.0020291 | Ratio: 0.9794246 | Entropy Term: 0.0979692


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  22%|██▏       | 7/32 [00:37<02:15,  5.43s/it]

Entire Validation Dataset Accuracy: 0.8958| 172.0 / 192.0 samples
old_predictions: 
tensor([1, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([-4.4739,  0.2200,  0.2200,  0.2200,  0.2200,  0.2200,  0.2200,  0.2200,
         0.2200,  0.2200], device='cuda:0')





Entropy of this k_epoch: 0.09733642637729645
Average policy_loss of this k_epoch: -0.028129899874329567
KL Divergence Average Loss: 0.0008944965666159987
Total Loss of this k_epoch: -0.03298777714371681






Entropy of this k_epoch: 0.090447798371315
Average policy_loss of this k_epoch: 0.0035835877060890198
KL Divergence Average Loss: 0.0015556002035737038
Total Loss of this k_epoch: -0.0009232463780790567






Entropy of this k_epoch: 0.08206403255462646
Average policy_loss of this k_epoch: -0.009541962295770645
KL Divergence Average Loss: 0.0014946607407182455
Total Loss of this k_epoch: -0.01363021694123745


Entropy of this k_epoch: 0.0721680074930191
Average policy_loss of this k_epoch: -0.029956981539726257
KL Divergence Average Loss: 0.0028628872241824865
Total Loss of this k_epoch: -0.03353675454854965


Entropy of this k_epoch: 0.061923034489154816
Average policy_loss of this k_epoch: -0.025136489421129227
KL Divergence Average Loss: 0.0046628438867628574
Total Loss of this k_epoch: -0.0281860139220953






Entropy of this k_epoch: 0.05005776882171631
Average policy_loss of this k_epoch: -0.03133920580148697
KL Divergence Average Loss: 0.007529708091169596
Total Loss of this k_epoch: -0.03376679867506027


Entropy of this k_epoch: 0.04158179461956024
Average policy_loss of this k_epoch: -0.03186890482902527
KL Divergence Average Loss: 0.009606420993804932
Total Loss of this k_epoch: -0.033851929008960724






Entropy of this k_epoch: 0.03326539695262909
Average policy_loss of this k_epoch: -0.03227715939283371
KL Divergence Average Loss: 0.014512830413877964
Total Loss of this k_epoch: -0.033795300871133804






Entropy of this k_epoch: 0.03172851353883743
Average policy_loss of this k_epoch: -0.03231651335954666
KL Divergence Average Loss: 0.019748875871300697
Total Loss of this k_epoch: -0.0337054505944252


Entropy of this k_epoch: 0.024225182831287384
Average policy_loss of this k_epoch: -0.03268484026193619
KL Divergence Average Loss: 0.015991458669304848
Total Loss of this k_epoch: -0.03373618423938751


Entropy of this k_epoch: 0.02392582967877388
Average policy_loss of this k_epoch: -0.0327230766415596
KL Divergence Average Loss: 0.01771298050880432
Total Loss of this k_epoch: -0.03374223783612251






Entropy of this k_epoch: 0.029479514807462692
Average policy_loss of this k_epoch: -0.030492709949612617
KL Divergence Average Loss: 0.10968240350484848
Total Loss of this k_epoch: -0.0308698620647192


Entropy of this k_epoch: 0.01858597621321678
Average policy_loss of this k_epoch: -0.03295977786183357
KL Divergence Average Loss: 0.017697660252451897
Total Loss of this k_epoch: -0.033712100237607956






Entropy of this k_epoch: 0.017361201345920563
Average policy_loss of this k_epoch: -0.03300363942980766
KL Divergence Average Loss: 0.018031291663646698
Total Loss of this k_epoch: -0.03369138762354851






Entropy of this k_epoch: 0.017394015565514565
Average policy_loss of this k_epoch: -0.03297127038240433
KL Divergence Average Loss: 0.018128668889403343
Total Loss of this k_epoch: -0.0336596854031086


Entropy of this k_epoch: 0.01990685425698757
Average policy_loss of this k_epoch: -0.029603995382785797
KL Divergence Average Loss: 0.061230193823575974
Total Loss of this k_epoch: -0.029987037181854248


Entropy of this k_epoch: 0.018303479999303818
Average policy_loss of this k_epoch: -0.032907042652368546
KL Divergence Average Loss: 0.01801854372024536
Total Loss of this k_epoch: -0.033642031252384186






Entropy of this k_epoch: 0.017943184822797775
Average policy_loss of this k_epoch: -0.03294728323817253
KL Divergence Average Loss: 0.018812164664268494
Total Loss of this k_epoch: -0.03365631774067879


Entropy of this k_epoch: 0.01568746566772461
Average policy_loss of this k_epoch: -0.0330791249871254
KL Divergence Average Loss: 0.01912277564406395
Total Loss of this k_epoch: -0.0336722731590271


Entropy of this k_epoch: 0.018747542053461075
Average policy_loss of this k_epoch: -0.032922469079494476
KL Divergence Average Loss: 0.017704350873827934
Total Loss of this k_epoch: -0.03368280082941055






Entropy of this k_epoch: 0.017209704965353012
Average policy_loss of this k_epoch: -0.0329558365046978
KL Divergence Average Loss: 0.018861429765820503
Total Loss of this k_epoch: -0.0336277075111866


Entropy of this k_epoch: 0.01739760860800743
Average policy_loss of this k_epoch: -0.03292059898376465
KL Divergence Average Loss: 0.018632767722010612
Total Loss of this k_epoch: -0.033604152500629425






Entropy of this k_epoch: 0.026586607098579407
Average policy_loss of this k_epoch: -0.03031797893345356
KL Divergence Average Loss: 0.04608089476823807
Total Loss of this k_epoch: -0.031186500564217567


Entropy of this k_epoch: 0.017469370737671852
Average policy_loss of this k_epoch: -0.03293706849217415
KL Divergence Average Loss: 0.01883493736386299
Total Loss of this k_epoch: -0.03362218663096428


Entropy of this k_epoch: 0.013266813941299915
Average policy_loss of this k_epoch: -0.033166639506816864
KL Divergence Average Loss: 0.01995111256837845
Total Loss of this k_epoch: -0.03363046795129776






Entropy of this k_epoch: 0.014753732830286026
Average policy_loss of this k_epoch: -0.033111974596977234
KL Divergence Average Loss: 0.01927785389125347
Total Loss of this k_epoch: -0.033656880259513855


Entropy of this k_epoch: 0.013886770233511925
Average policy_loss of this k_epoch: -0.0331573560833931
KL Divergence Average Loss: 0.019809026271104813
Total Loss of this k_epoch: -0.03365360572934151


Entropy of this k_epoch: 0.01530672237277031
Average policy_loss of this k_epoch: -0.03307521343231201
KL Divergence Average Loss: 0.022276336327195168
Total Loss of this k_epoch: -0.0336177833378315






Entropy of this k_epoch: 0.014485792256891727
Average policy_loss of this k_epoch: -0.0331258624792099
KL Divergence Average Loss: 0.01941739022731781
Total Loss of this k_epoch: -0.03365597873926163


Entropy of this k_epoch: 0.013821837492287159
Average policy_loss of this k_epoch: -0.03315456584095955
KL Divergence Average Loss: 0.020119722932577133
Total Loss of this k_epoch: -0.03364446386694908


Entropy of this k_epoch: 0.013390221633017063
Average policy_loss of this k_epoch: -0.03319418802857399
KL Divergence Average Loss: 0.02038602903485298
Total Loss of this k_epoch: -0.03365984186530113






Entropy of this k_epoch: 0.014697277918457985
Average policy_loss of this k_epoch: -0.03311225771903992
KL Divergence Average Loss: 0.019629616290330887
Total Loss of this k_epoch: -0.033650826662778854


Entropy of this k_epoch: 0.015469426289200783
Average policy_loss of this k_epoch: -0.03310322016477585
KL Divergence Average Loss: 0.018908556550741196
Total Loss of this k_epoch: -0.03368760645389557


Entropy of this k_epoch: 0.016141433268785477
Average policy_loss of this k_epoch: -0.03303518146276474
KL Divergence Average Loss: 0.018746189773082733
Total Loss of this k_epoch: -0.033654794096946716






Entropy of this k_epoch: 0.016257625073194504
Average policy_loss of this k_epoch: -0.033030591905117035
KL Divergence Average Loss: 0.01866954378783703
Total Loss of this k_epoch: -0.0336567759513855


Entropy of this k_epoch: 0.015543883666396141
Average policy_loss of this k_epoch: -0.033074092119932175
KL Divergence Average Loss: 0.018906116485595703
Total Loss of this k_epoch: -0.03366222232580185


Entropy of this k_epoch: 0.017079506069421768
Average policy_loss of this k_epoch: -0.033010803163051605
KL Divergence Average Loss: 0.018328648060560226
Total Loss of this k_epoch: -0.033681489527225494






Entropy of this k_epoch: 0.017735227942466736
Average policy_loss of this k_epoch: -0.032991062849760056
KL Divergence Average Loss: 0.017954332754015923
Total Loss of this k_epoch: -0.03369827941060066


Entropy of this k_epoch: 0.015936030074954033
Average policy_loss of this k_epoch: -0.0330631285905838
KL Divergence Average Loss: 0.01907433569431305
Total Loss of this k_epoch: -0.03366918861865997


Entropy of this k_epoch: 0.0178845152258873
Average policy_loss of this k_epoch: -0.032975032925605774
KL Divergence Average Loss: 0.01803971268236637
Total Loss of this k_epoch: -0.03368886187672615






Entropy of this k_epoch: 0.019018948078155518
Average policy_loss of this k_epoch: -0.03293183818459511
KL Divergence Average Loss: 0.017991235479712486
Total Loss of this k_epoch: -0.033702872693538666


Entropy of this k_epoch: 0.020248524844646454
Average policy_loss of this k_epoch: -0.03286643326282501
KL Divergence Average Loss: 0.019848298281431198
Total Loss of this k_epoch: -0.033680375665426254


Entropy of this k_epoch: 0.017462020739912987
Average policy_loss of this k_epoch: -0.032996974885463715
KL Divergence Average Loss: 0.018118469044566154
Total Loss of this k_epoch: -0.03368889167904854






Entropy of this k_epoch: 0.0184393972158432
Average policy_loss of this k_epoch: -0.03295747563242912
KL Divergence Average Loss: 0.017535235732793808
Total Loss of this k_epoch: -0.03370409086346626


Entropy of this k_epoch: 0.020047985017299652
Average policy_loss of this k_epoch: -0.032872408628463745
KL Divergence Average Loss: 0.018004123121500015
Total Loss of this k_epoch: -0.03369476646184921


Entropy of this k_epoch: 0.031189583241939545
Average policy_loss of this k_epoch: -0.031042689457535744
KL Divergence Average Loss: 0.0319841168820858
Total Loss of this k_epoch: -0.032282330095767975






Entropy of this k_epoch: 0.025103813037276268
Average policy_loss of this k_epoch: -0.032602373510599136
KL Divergence Average Loss: 0.016070734709501266
Total Loss of this k_epoch: -0.03369685634970665


Entropy of this k_epoch: 0.02140454202890396
Average policy_loss of this k_epoch: -0.03284193575382233
KL Divergence Average Loss: 0.016677726060152054
Total Loss of this k_epoch: -0.03374538570642471


Entropy of this k_epoch: 0.02564084343612194
Average policy_loss of this k_epoch: -0.0326356440782547
KL Divergence Average Loss: 0.023306166753172874
Total Loss of this k_epoch: -0.0336846262216568






Entropy of this k_epoch: 0.024337932467460632
Average policy_loss of this k_epoch: -0.032720379531383514
KL Divergence Average Loss: 0.018407640978693962
Total Loss of this k_epoch: -0.03375319764018059


Entropy of this k_epoch: 0.024713942781090736
Average policy_loss of this k_epoch: -0.03266122192144394
KL Divergence Average Loss: 0.021322129294276237
Total Loss of this k_epoch: -0.03368369862437248


Entropy of this k_epoch: 0.02826899290084839
Average policy_loss of this k_epoch: -0.03245079517364502
KL Divergence Average Loss: 0.027492012828588486
Total Loss of this k_epoch: -0.03358932584524155






Entropy of this k_epoch: 0.03842850774526596
Average policy_loss of this k_epoch: -0.031041344627738
KL Divergence Average Loss: 0.09279591590166092
Total Loss of this k_epoch: -0.03203481063246727


Entropy of this k_epoch: 0.028219982981681824
Average policy_loss of this k_epoch: -0.03254114091396332
KL Divergence Average Loss: 0.0218798965215683
Total Loss of this k_epoch: -0.033733341842889786


Entropy of this k_epoch: 0.028894519433379173
Average policy_loss of this k_epoch: -0.032521557062864304
KL Divergence Average Loss: 0.0179760605096817
Total Loss of this k_epoch: -0.03378652408719063






Entropy of this k_epoch: 0.027896162122488022
Average policy_loss of this k_epoch: -0.03254298120737076
KL Divergence Average Loss: 0.014260673895478249
Total Loss of this k_epoch: -0.033795181661844254


Entropy of this k_epoch: 0.03230784088373184
Average policy_loss of this k_epoch: -0.03235901892185211
KL Divergence Average Loss: 0.012662502005696297
Total Loss of this k_epoch: -0.033847786486148834


Entropy of this k_epoch: 0.03177160024642944
Average policy_loss of this k_epoch: -0.032361313700675964
KL Divergence Average Loss: 0.012418685480952263
Total Loss of this k_epoch: -0.03382570669054985






Entropy of this k_epoch: 0.03199997916817665
Average policy_loss of this k_epoch: -0.03230494633316994
KL Divergence Average Loss: 0.012400977313518524
Total Loss of this k_epoch: -0.03378093242645264


Entropy of this k_epoch: 0.042546674609184265
Average policy_loss of this k_epoch: -0.03161244839429855
KL Divergence Average Loss: 0.03591008484363556
Total Loss of this k_epoch: -0.03338068351149559


Entropy of this k_epoch: 0.035463109612464905
Average policy_loss of this k_epoch: -0.032172031700611115
KL Divergence Average Loss: 0.011022867634892464
Total Loss of this k_epoch: -0.03383496031165123




Epoch 8/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.24it/s]


Entropy of this k_epoch: 0.040513500571250916
Average policy_loss of this k_epoch: -0.03190966323018074
KL Divergence Average Loss: 0.009531756862998009
Total Loss of this k_epoch: -0.03384001925587654


Entropy of this k_epoch: 0.0391148142516613
Average policy_loss of this k_epoch: -0.03195667639374733
KL Divergence Average Loss: 0.010114472359418869
Total Loss of this k_epoch: -0.03381127119064331


Entropy of this k_epoch: 0.044438913464546204
Average policy_loss of this k_epoch: -0.03162476420402527
KL Divergence Average Loss: 0.010576377622783184
Total Loss of this k_epoch: -0.03374094516038895

Last k_epoch stats:
Loss: -0.0337409 | Ratio: 0.9952695 | Entropy Term: 0.0444389


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  25%|██▌       | 8/32 [00:43<02:10,  5.45s/it]

Entire Validation Dataset Accuracy: 0.9010| 173.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 1, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 0., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([ 0.1782,  0.1782,  0.1782,  0.1782,  0.1782,  0.1782, -5.5241,  0.1782,
         0.1782,  0.1782], device='cuda:0')





Entropy of this k_epoch: 0.04569840431213379
Average policy_loss of this k_epoch: -0.017131727188825607
KL Divergence Average Loss: 0.0009813241194933653
Total Loss of this k_epoch: -0.019406834617257118






Entropy of this k_epoch: 0.04668886587023735
Average policy_loss of this k_epoch: -0.01708325557410717
KL Divergence Average Loss: 0.0015719959046691656
Total Loss of this k_epoch: -0.019401978701353073






Entropy of this k_epoch: 0.046574290841817856
Average policy_loss of this k_epoch: -0.004406120628118515
KL Divergence Average Loss: 0.0004349196969997138
Total Loss of this k_epoch: -0.006730485707521439


Entropy of this k_epoch: 0.04387036710977554
Average policy_loss of this k_epoch: -0.01725015975534916
KL Divergence Average Loss: 0.0004027055110782385
Total Loss of this k_epoch: -0.01943965069949627


Entropy of this k_epoch: 0.04322905093431473
Average policy_loss of this k_epoch: -0.017258167266845703
KL Divergence Average Loss: 0.0004993389011360705
Total Loss of this k_epoch: -0.01941462606191635






Entropy of this k_epoch: 0.042994387447834015
Average policy_loss of this k_epoch: -0.017270267009735107
KL Divergence Average Loss: 0.0004346319183241576
Total Loss of this k_epoch: -0.019415641203522682


Entropy of this k_epoch: 0.04403729736804962
Average policy_loss of this k_epoch: 0.007165372371673584
KL Divergence Average Loss: 0.00031311518978327513
Total Loss of this k_epoch: 0.004966638516634703






Entropy of this k_epoch: 0.04313017800450325
Average policy_loss of this k_epoch: -0.017169075086712837
KL Divergence Average Loss: 0.002915767952799797
Total Loss of this k_epoch: -0.019296426326036453






Entropy of this k_epoch: 0.040043167769908905
Average policy_loss of this k_epoch: -0.01735619828104973
KL Divergence Average Loss: 0.0016100832726806402
Total Loss of this k_epoch: -0.01934225670993328


Entropy of this k_epoch: 0.042047981172800064
Average policy_loss of this k_epoch: -0.017032193019986153
KL Divergence Average Loss: 0.008303855545818806
Total Loss of this k_epoch: -0.019051553681492805


Entropy of this k_epoch: 0.03115634247660637
Average policy_loss of this k_epoch: -0.01775244250893593
KL Divergence Average Loss: 0.0011105745797976851
Total Loss of this k_epoch: -0.019299155101180077






Entropy of this k_epoch: 0.03277670219540596
Average policy_loss of this k_epoch: -0.017687730491161346
KL Divergence Average Loss: 0.0011968327453359962
Total Loss of this k_epoch: -0.01931459829211235


Entropy of this k_epoch: 0.0283881314098835
Average policy_loss of this k_epoch: -0.017908718436956406
KL Divergence Average Loss: 0.0012825874146074057
Total Loss of this k_epoch: -0.01931529864668846







Entropy of this k_epoch: 0.028100907802581787
Average policy_loss of this k_epoch: -0.017920108512043953
KL Divergence Average Loss: 0.0011845262488350272
Total Loss of this k_epoch: -0.019313309341669083



Epoch 9/32 (Inner K-Epochs):  22%|██▏       | 14/64 [00:01<00:04, 12.16it/s][A


Entropy of this k_epoch: 0.02694178931415081
Average policy_loss of this k_epoch: -0.017950158566236496
KL Divergence Average Loss: 0.001521154073998332
Total Loss of this k_epoch: -0.019282035529613495


Entropy of this k_epoch: 0.02912076935172081
Average policy_loss of this k_epoch: -0.017829280346632004
KL Divergence Average Loss: 0.0012914328835904598
Total Loss of this k_epoch: -0.01927240565419197






Entropy of this k_epoch: 0.02596147172152996
Average policy_loss of this k_epoch: -0.017983824014663696
KL Divergence Average Loss: 0.0016584076220169663
Total Loss of this k_epoch: -0.0192653127014637


Entropy of this k_epoch: 0.02727888524532318
Average policy_loss of this k_epoch: -0.017915897071361542
KL Divergence Average Loss: 0.0014290688559412956
Total Loss of this k_epoch: -0.0192655511200428


Entropy of this k_epoch: 0.02821403183043003
Average policy_loss of this k_epoch: -0.01789093017578125
KL Divergence Average Loss: 0.0017574576195329428
Total Loss of this k_epoch: -0.01928405836224556






Entropy of this k_epoch: 0.0275561586022377
Average policy_loss of this k_epoch: -0.01790470816195011
KL Divergence Average Loss: 0.0013986367266625166
Total Loss of this k_epoch: -0.01926852948963642


Entropy of this k_epoch: 0.026511259377002716
Average policy_loss of this k_epoch: -0.01795751415193081
KL Divergence Average Loss: 0.0014499082462862134
Total Loss of this k_epoch: -0.0192685779184103


Entropy of this k_epoch: 0.02907024882733822
Average policy_loss of this k_epoch: -0.017840996384620667
KL Divergence Average Loss: 0.0012880819849669933
Total Loss of this k_epoch: -0.019281629472970963






Entropy of this k_epoch: 0.030373845249414444
Average policy_loss of this k_epoch: -0.01783222332596779
KL Divergence Average Loss: 0.0010318667627871037
Total Loss of this k_epoch: -0.019340597093105316


Entropy of this k_epoch: 0.03332975506782532
Average policy_loss of this k_epoch: -0.01768547296524048
KL Divergence Average Loss: 0.0008538412512280047
Total Loss of this k_epoch: -0.0193434227257967


Entropy of this k_epoch: 0.032335925847291946
Average policy_loss of this k_epoch: -0.017718330025672913
KL Divergence Average Loss: 0.0008664776105433702
Total Loss of this k_epoch: -0.019326461479067802






Entropy of this k_epoch: 0.03315811604261398
Average policy_loss of this k_epoch: -0.0176730714738369
KL Divergence Average Loss: 0.0008890572935342789
Total Loss of this k_epoch: -0.01932208612561226


Entropy of this k_epoch: 0.03448166325688362
Average policy_loss of this k_epoch: -0.01761702634394169
KL Divergence Average Loss: 0.0008563531446270645
Total Loss of this k_epoch: -0.01933254487812519


Entropy of this k_epoch: 0.039305925369262695
Average policy_loss of this k_epoch: 0.007492218166589737
KL Divergence Average Loss: 0.0005872322944924235
Total Loss of this k_epoch: 0.0055327946320176125






Entropy of this k_epoch: 0.03707592189311981
Average policy_loss of this k_epoch: -0.01755734533071518
KL Divergence Average Loss: 0.0011081346310675144
Total Loss of this k_epoch: -0.019400060176849365


Entropy of this k_epoch: 0.033256515860557556
Average policy_loss of this k_epoch: -0.017674166709184647
KL Divergence Average Loss: 0.0008886769064702094
Total Loss of this k_epoch: -0.019328106194734573


Entropy of this k_epoch: 0.03349517285823822
Average policy_loss of this k_epoch: -0.017684143036603928
KL Divergence Average Loss: 0.0009676475310698152
Total Loss of this k_epoch: -0.019349224865436554






Entropy of this k_epoch: 0.03262726962566376
Average policy_loss of this k_epoch: -0.017722880467772484
KL Divergence Average Loss: 0.0010859258472919464
Total Loss of this k_epoch: -0.019343385472893715


Entropy of this k_epoch: 0.03065529465675354
Average policy_loss of this k_epoch: -0.017799096181988716
KL Divergence Average Loss: 0.0011452916078269482
Total Loss of this k_epoch: -0.019320407882332802


Entropy of this k_epoch: 0.03071024641394615
Average policy_loss of this k_epoch: -0.017810478806495667
KL Divergence Average Loss: 0.000998976407572627
Total Loss of this k_epoch: -0.019336001947522163






Entropy of this k_epoch: 0.03168568015098572
Average policy_loss of this k_epoch: -0.01775413751602173
KL Divergence Average Loss: 0.0009826039895415306
Total Loss of this k_epoch: -0.01932859607040882


Entropy of this k_epoch: 0.036906227469444275
Average policy_loss of this k_epoch: -0.017489496618509293
KL Divergence Average Loss: 0.0014259271556511521
Total Loss of this k_epoch: -0.019320549443364143


Entropy of this k_epoch: 0.03329150378704071
Average policy_loss of this k_epoch: -0.017675263807177544
KL Divergence Average Loss: 0.001996175618842244
Total Loss of this k_epoch: -0.01931987702846527






Entropy of this k_epoch: 0.04178914800286293
Average policy_loss of this k_epoch: -0.016149774193763733
KL Divergence Average Loss: 0.028986044228076935
Total Loss of this k_epoch: -0.01794937066733837


Entropy of this k_epoch: 0.03300963342189789
Average policy_loss of this k_epoch: -0.017672069370746613
KL Divergence Average Loss: 0.0010707078035920858
Total Loss of this k_epoch: -0.019311845302581787


Entropy of this k_epoch: 0.033123262226581573
Average policy_loss of this k_epoch: -0.01769513450562954
KL Divergence Average Loss: 0.0010583302937448025
Total Loss of this k_epoch: -0.019340714439749718






Entropy of this k_epoch: 0.03761710599064827
Average policy_loss of this k_epoch: -0.017518572509288788
KL Divergence Average Loss: 0.000767609803006053
Total Loss of this k_epoch: -0.019391752779483795


Entropy of this k_epoch: 0.03694305568933487
Average policy_loss of this k_epoch: -0.017557846382260323
KL Divergence Average Loss: 0.0011920969700440764
Total Loss of this k_epoch: -0.019393078982830048


Entropy of this k_epoch: 0.03699745982885361
Average policy_loss of this k_epoch: -0.017552582547068596
KL Divergence Average Loss: 0.007145490497350693
Total Loss of this k_epoch: -0.01933100074529648






Entropy of this k_epoch: 0.06567224860191345
Average policy_loss of this k_epoch: -0.013085786253213882
KL Divergence Average Loss: 0.4131682515144348
Total Loss of this k_epoch: -0.01223771646618843


Entropy of this k_epoch: 0.03737654536962509
Average policy_loss of this k_epoch: -0.01751496084034443
KL Divergence Average Loss: 0.001374698942527175
Total Loss of this k_epoch: -0.01937004178762436


Entropy of this k_epoch: 0.03933596611022949
Average policy_loss of this k_epoch: -0.017440320923924446
KL Divergence Average Loss: 0.0006770379841327667
Total Loss of this k_epoch: -0.019400348886847496






Entropy of this k_epoch: 0.04329417645931244
Average policy_loss of this k_epoch: -0.01025039330124855
KL Divergence Average Loss: 0.0008136293618008494
Total Loss of this k_epoch: -0.012406965717673302


Entropy of this k_epoch: 0.04476860165596008
Average policy_loss of this k_epoch: -0.006652291864156723
KL Divergence Average Loss: 0.00036287272814661264
Total Loss of this k_epoch: -0.008887093514204025


Entropy of this k_epoch: 0.04536793380975723
Average policy_loss of this k_epoch: -0.01719525456428528
KL Divergence Average Loss: 0.00036862900014966726
Total Loss of this k_epoch: -0.019459964707493782






Entropy of this k_epoch: 0.041318509727716446
Average policy_loss of this k_epoch: -0.017355458810925484
KL Divergence Average Loss: 0.00046214961912482977
Total Loss of this k_epoch: -0.01941676251590252


Entropy of this k_epoch: 0.04683789238333702
Average policy_loss of this k_epoch: -0.01589914783835411
KL Divergence Average Loss: 0.029680481180548668
Total Loss of this k_epoch: -0.01794423721730709


Entropy of this k_epoch: 0.03983188048005104
Average policy_loss of this k_epoch: -0.01741919293999672
KL Divergence Average Loss: 0.0005263620987534523
Total Loss of this k_epoch: -0.01940552331507206






Entropy of this k_epoch: 0.04435236379504204
Average policy_loss of this k_epoch: -0.01683146506547928
KL Divergence Average Loss: 0.010058808140456676
Total Loss of this k_epoch: -0.018948495388031006


Entropy of this k_epoch: 0.03698121756315231
Average policy_loss of this k_epoch: -0.01750972308218479
KL Divergence Average Loss: 0.0009600762859918177
Total Loss of this k_epoch: -0.01934918388724327


Entropy of this k_epoch: 0.036885492503643036
Average policy_loss of this k_epoch: -0.017517544329166412
KL Divergence Average Loss: 0.0011546346358954906
Total Loss of this k_epoch: -0.019350271672010422





Epoch 9/32 (Inner K-Epochs):  91%|█████████ | 58/64 [00:04<00:00, 12.13it/s]

Entropy of this k_epoch: 0.03661278635263443
Average policy_loss of this k_epoch: -0.017564013600349426
KL Divergence Average Loss: 0.0006361414561979473
Total Loss of this k_epoch: -0.019388291984796524


Entropy of this k_epoch: 0.03233261778950691
Average policy_loss of this k_epoch: -0.017718158662319183
KL Divergence Average Loss: 0.0009442133014090359
Total Loss of this k_epoch: -0.019325347617268562


Entropy of this k_epoch: 0.0402347557246685
Average policy_loss of this k_epoch: -0.015545960515737534
KL Divergence Average Loss: 0.04522149637341499
Total Loss of this k_epoch: -0.017105482518672943



[A


Entropy of this k_epoch: 0.032775893807411194
Average policy_loss of this k_epoch: -0.017693694680929184
KL Divergence Average Loss: 0.001017227303236723
Total Loss of this k_epoch: -0.019322317093610764


Entropy of this k_epoch: 0.03268607705831528
Average policy_loss of this k_epoch: -0.01772332191467285
KL Divergence Average Loss: 0.0010962795931845903
Total Loss of this k_epoch: -0.019346661865711212






Entropy of this k_epoch: 0.0302888173609972
Average policy_loss of this k_epoch: -0.017828108742833138
KL Divergence Average Loss: 0.0012096508871763945
Total Loss of this k_epoch: -0.019330453127622604


Entropy of this k_epoch: 0.032894283533096313
Average policy_loss of this k_epoch: -0.017710531130433083
KL Divergence Average Loss: 0.0009544899221509695
Total Loss of this k_epoch: -0.01934570074081421


Entropy of this k_epoch: 0.033183030784130096
Average policy_loss of this k_epoch: -0.017704181373119354
KL Divergence Average Loss: 0.005009982734918594
Total Loss of this k_epoch: -0.019313232973217964




Epoch 9/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.24it/s]
>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  28%|██▊       | 9/32 [00:48<02:05,  5.46s/it]

Entropy of this k_epoch: 0.03214985132217407
Average policy_loss of this k_epoch: -0.017748519778251648
KL Divergence Average Loss: 0.0023016787599772215
Total Loss of this k_epoch: -0.019332995638251305

Last k_epoch stats:
Loss: -0.0193330 | Ratio: 0.9930965 | Entropy Term: 0.0321499
Entire Validation Dataset Accuracy: 0.9167| 176.0 / 192.0 samples
old_predictions: 
tensor([0, 1, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 0., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([ 0.1782, -5.5241,  0.1782,  0.1782,  0.1782,  0.1782,  0.1782,  0.1782,
         0.1782,  0.1782], device='cuda:0')





Entropy of this k_epoch: 0.0355338454246521
Average policy_loss of this k_epoch: 0.010299883782863617
KL Divergence Average Loss: 0.003140217624604702
Total Loss of this k_epoch: 0.008554593659937382


Entropy of this k_epoch: 0.03640194237232208
Average policy_loss of this k_epoch: 0.0035922788083553314
KL Divergence Average Loss: 0.005472919903695583
Total Loss of this k_epoch: 0.0018269108841195703






Entropy of this k_epoch: 0.03465653955936432
Average policy_loss of this k_epoch: -0.005953826010227203
KL Divergence Average Loss: 0.013343995437026024
Total Loss of this k_epoch: -0.0075532132759690285


Entropy of this k_epoch: 0.024856023490428925
Average policy_loss of this k_epoch: -0.017524201422929764
KL Divergence Average Loss: 0.0006550358375534415
Total Loss of this k_epoch: -0.018760452046990395


Entropy of this k_epoch: 0.03464944660663605
Average policy_loss of this k_epoch: -0.016683664172887802
KL Divergence Average Loss: 0.04720119386911392
Total Loss of this k_epoch: -0.017944125458598137






Entropy of this k_epoch: 0.033061683177948
Average policy_loss of this k_epoch: -0.015535615384578705
KL Divergence Average Loss: 0.11647430062294006
Total Loss of this k_epoch: -0.01602395623922348


Entropy of this k_epoch: 0.015680957585573196
Average policy_loss of this k_epoch: -0.01785150356590748
KL Divergence Average Loss: 0.0019505050731822848
Total Loss of this k_epoch: -0.018616046756505966


Entropy of this k_epoch: 0.014684938825666904
Average policy_loss of this k_epoch: -0.017885763198137283
KL Divergence Average Loss: 0.0020926077850162983
Total Loss of this k_epoch: -0.01859908364713192






Entropy of this k_epoch: 0.013944300822913647
Average policy_loss of this k_epoch: -0.017903480678796768
KL Divergence Average Loss: 0.0019311161013320088
Total Loss of this k_epoch: -0.018581382930278778


Entropy of this k_epoch: 0.013351812027394772
Average policy_loss of this k_epoch: -0.017922107130289078
KL Divergence Average Loss: 0.0017858613282442093
Total Loss of this k_epoch: -0.01857183873653412


Entropy of this k_epoch: 0.013773346319794655
Average policy_loss of this k_epoch: -0.017887748777866364
KL Divergence Average Loss: 0.0021356544457376003
Total Loss of this k_epoch: -0.018555058166384697






Entropy of this k_epoch: 0.011718695983290672
Average policy_loss of this k_epoch: -0.01797439157962799
KL Divergence Average Loss: 0.0021412561181932688
Total Loss of this k_epoch: -0.018538912758231163


Entropy of this k_epoch: 0.011766770854592323
Average policy_loss of this k_epoch: -0.017974747344851494
KL Divergence Average Loss: 0.002912025200203061
Total Loss of this k_epoch: -0.018533965572714806


Entropy of this k_epoch: 0.011763865128159523
Average policy_loss of this k_epoch: -0.017963986843824387
KL Divergence Average Loss: 0.002072045812383294
Total Loss of this k_epoch: -0.01853146031498909






Entropy of this k_epoch: 0.011729741469025612
Average policy_loss of this k_epoch: -0.017968226224184036
KL Divergence Average Loss: 0.0020714993588626385
Total Loss of this k_epoch: -0.018533999100327492


Entropy of this k_epoch: 0.012184630148112774
Average policy_loss of this k_epoch: -0.01796155795454979
KL Divergence Average Loss: 0.0019690310582518578
Total Loss of this k_epoch: -0.01855110004544258


Entropy of this k_epoch: 0.013890796341001987
Average policy_loss of this k_epoch: -0.017888939008116722
KL Divergence Average Loss: 0.0020026634447276592
Total Loss of this k_epoch: -0.01856345124542713






Entropy of this k_epoch: 0.013222780078649521
Average policy_loss of this k_epoch: -0.017921796068549156
KL Divergence Average Loss: 0.001807656604796648
Total Loss of this k_epoch: -0.0185648575425148


Entropy of this k_epoch: 0.014346900396049023
Average policy_loss of this k_epoch: -0.017868369817733765
KL Divergence Average Loss: 0.0019775615073740482
Total Loss of this k_epoch: -0.018565939739346504


Entropy of this k_epoch: 0.015144494362175465
Average policy_loss of this k_epoch: -0.017842113971710205
KL Divergence Average Loss: 0.0016394376289099455
Total Loss of this k_epoch: -0.018582943826913834






Entropy of this k_epoch: 0.015008124522864819
Average policy_loss of this k_epoch: -0.017861660569906235
KL Divergence Average Loss: 0.0014713730197399855
Total Loss of this k_epoch: -0.018597353249788284


Entropy of this k_epoch: 0.016132138669490814
Average policy_loss of this k_epoch: -0.017823729664087296
KL Divergence Average Loss: 0.001302662887610495
Total Loss of this k_epoch: -0.018617309629917145


Entropy of this k_epoch: 0.013770285062491894
Average policy_loss of this k_epoch: -0.017909565940499306
KL Divergence Average Loss: 0.0017709387466311455
Total Loss of this k_epoch: -0.018580369651317596






Entropy of this k_epoch: 0.015233641490340233
Average policy_loss of this k_epoch: -0.01785680092871189
KL Divergence Average Loss: 0.001426386064849794
Total Loss of this k_epoch: -0.01860421895980835


Entropy of this k_epoch: 0.026516996324062347
Average policy_loss of this k_epoch: -0.015845876187086105
KL Divergence Average Loss: 0.04033639654517174
Total Loss of this k_epoch: -0.016768362373113632


Entropy of this k_epoch: 0.019223742187023163
Average policy_loss of this k_epoch: -0.017704060301184654
KL Divergence Average Loss: 0.0014013536274433136
Total Loss of this k_epoch: -0.018651233986020088






Entropy of this k_epoch: 0.01592225581407547
Average policy_loss of this k_epoch: -0.017833417281508446
KL Divergence Average Loss: 0.001488494803197682
Total Loss of this k_epoch: -0.018614646047353745


Entropy of this k_epoch: 0.015223845839500427
Average policy_loss of this k_epoch: -0.017859037965536118
KL Divergence Average Loss: 0.0015731063904240727
Total Loss of this k_epoch: -0.018604498356580734


Entropy of this k_epoch: 0.017641210928559303
Average policy_loss of this k_epoch: 0.004146609455347061
KL Divergence Average Loss: 0.0011605030158534646
Total Loss of this k_epoch: 0.003276154166087508






Entropy of this k_epoch: 0.015864457935094833
Average policy_loss of this k_epoch: -0.017833538353443146
KL Divergence Average Loss: 0.001426337636075914
Total Loss of this k_epoch: -0.018612496554851532


Entropy of this k_epoch: 0.015725813806056976
Average policy_loss of this k_epoch: -0.017827870324254036
KL Divergence Average Loss: 0.0013824841007590294
Total Loss of this k_epoch: -0.018600337207317352


Entropy of this k_epoch: 0.014174235984683037
Average policy_loss of this k_epoch: -0.017884191125631332
KL Divergence Average Loss: 0.001666179159656167
Total Loss of this k_epoch: -0.0185762420296669






Entropy of this k_epoch: 0.021825112402439117
Average policy_loss of this k_epoch: -0.017470693215727806
KL Divergence Average Loss: 0.020332299172878265
Total Loss of this k_epoch: -0.01835862547159195


Entropy of this k_epoch: 0.015080221928656101
Average policy_loss of this k_epoch: -0.017862945795059204
KL Divergence Average Loss: 0.0020857430063188076
Total Loss of this k_epoch: -0.018596099689602852


Entropy of this k_epoch: 0.01375395618379116
Average policy_loss of this k_epoch: -0.01790112815797329
KL Divergence Average Loss: 0.001988733885809779
Total Loss of this k_epoch: -0.018568938598036766






Entropy of this k_epoch: 0.01438837219029665
Average policy_loss of this k_epoch: -0.017883405089378357
KL Divergence Average Loss: 0.0020749710965901613
Total Loss of this k_epoch: -0.018582073971629143


Entropy of this k_epoch: 0.014888941310346127
Average policy_loss of this k_epoch: -0.017878131940960884
KL Divergence Average Loss: 0.0021982828620821238
Total Loss of this k_epoch: -0.018600596114993095


Entropy of this k_epoch: 0.012727423571050167
Average policy_loss of this k_epoch: -0.01793690025806427
KL Divergence Average Loss: 0.001930266385897994
Total Loss of this k_epoch: -0.018553968518972397






Entropy of this k_epoch: 0.015076600015163422
Average policy_loss of this k_epoch: -0.0178680382668972
KL Divergence Average Loss: 0.0016779025318101048
Total Loss of this k_epoch: -0.01860508881509304


Entropy of this k_epoch: 0.014908592216670513
Average policy_loss of this k_epoch: -0.01787576824426651
KL Divergence Average Loss: 0.0019488747930154204
Total Loss of this k_epoch: -0.018601708114147186


Entropy of this k_epoch: 0.024264931678771973
Average policy_loss of this k_epoch: -0.01729062758386135
KL Divergence Average Loss: 0.043752674013376236
Total Loss of this k_epoch: -0.018066348508000374






Entropy of this k_epoch: 0.01677168905735016
Average policy_loss of this k_epoch: -0.017803503200411797
KL Divergence Average Loss: 0.0013912374852225184
Total Loss of this k_epoch: -0.018628176301717758


Entropy of this k_epoch: 0.024820908904075623
Average policy_loss of this k_epoch: -0.017121102660894394
KL Divergence Average Loss: 0.045230235904455185
Total Loss of this k_epoch: -0.01790984533727169


Entropy of this k_epoch: 0.019099224358797073
Average policy_loss of this k_epoch: -0.017726343125104904
KL Divergence Average Loss: 0.001302248565480113
Total Loss of this k_epoch: -0.018668282777071






Entropy of this k_epoch: 0.018423616886138916
Average policy_loss of this k_epoch: -0.017755594104528427
KL Divergence Average Loss: 0.0010301065631210804
Total Loss of this k_epoch: -0.018666474148631096


Entropy of this k_epoch: 0.01771063730120659
Average policy_loss of this k_epoch: -0.017756950110197067
KL Divergence Average Loss: 0.0011605345644056797
Total Loss of this k_epoch: -0.018630875274538994


Entropy of this k_epoch: 0.01763347163796425
Average policy_loss of this k_epoch: -0.017778627574443817
KL Divergence Average Loss: 0.0010598235530778766
Total Loss of this k_epoch: -0.01864970289170742






Entropy of this k_epoch: 0.01962289772927761
Average policy_loss of this k_epoch: -0.017704375088214874
KL Divergence Average Loss: 0.0008028321317397058
Total Loss of this k_epoch: -0.018677491694688797


Entropy of this k_epoch: 0.022158654406666756
Average policy_loss of this k_epoch: -0.017614053562283516
KL Divergence Average Loss: 0.0006413448136299849
Total Loss of this k_epoch: -0.018715573474764824


Entropy of this k_epoch: 0.02446441911160946
Average policy_loss of this k_epoch: -0.01752130500972271
KL Divergence Average Loss: 0.0006180736236274242
Total Loss of this k_epoch: -0.01873834617435932






Entropy of this k_epoch: 0.0288643017411232
Average policy_loss of this k_epoch: -0.01019129529595375
KL Divergence Average Loss: 0.00033024343429133296
Total Loss of this k_epoch: -0.011631207540631294


Entropy of this k_epoch: 0.026254715397953987
Average policy_loss of this k_epoch: -0.01746397465467453
KL Divergence Average Loss: 0.00040038995211943984
Total Loss of this k_epoch: -0.018772706389427185


Entropy of this k_epoch: 0.026315394788980484
Average policy_loss of this k_epoch: -0.017458831891417503
KL Divergence Average Loss: 0.0005265469662845135
Total Loss of this k_epoch: -0.018769335001707077






Entropy of this k_epoch: 0.025486290454864502
Average policy_loss of this k_epoch: -0.017481788992881775
KL Divergence Average Loss: 0.0006194745656102896
Total Loss of this k_epoch: -0.018749907612800598


Entropy of this k_epoch: 0.027119114995002747
Average policy_loss of this k_epoch: -0.017371786758303642
KL Divergence Average Loss: 0.0015043432358652353
Total Loss of this k_epoch: -0.01871269941329956


Entropy of this k_epoch: 0.02574983984231949
Average policy_loss of this k_epoch: -0.017446082085371017
KL Divergence Average Loss: 0.0013976923655718565
Total Loss of this k_epoch: -0.018719596788287163






Entropy of this k_epoch: 0.022539079189300537
Average policy_loss of this k_epoch: -0.01759381592273712
KL Divergence Average Loss: 0.0007052330183796585
Total Loss of this k_epoch: -0.01871371828019619


Entropy of this k_epoch: 0.026295289397239685
Average policy_loss of this k_epoch: -0.01742476224899292
KL Divergence Average Loss: 0.0010803707409650087
Total Loss of this k_epoch: -0.018728723749518394


Entropy of this k_epoch: 0.022347547113895416
Average policy_loss of this k_epoch: -0.01760719157755375
KL Divergence Average Loss: 0.0006219959468580782
Total Loss of this k_epoch: -0.018718348816037178






Entropy of this k_epoch: 0.025290315970778465
Average policy_loss of this k_epoch: -0.017472043633461
KL Divergence Average Loss: 0.0008551785140298307
Total Loss of this k_epoch: -0.01872800849378109


Entropy of this k_epoch: 0.035556744784116745
Average policy_loss of this k_epoch: -0.016836784780025482
KL Divergence Average Loss: 0.0065583400428295135
Total Loss of this k_epoch: -0.018549038097262383


Entropy of this k_epoch: 0.029149942100048065
Average policy_loss of this k_epoch: -0.017201390117406845
KL Divergence Average Loss: 0.0037829428911209106
Total Loss of this k_epoch: -0.018621057271957397




Epoch 10/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.32it/s]


Entropy of this k_epoch: 0.026866797357797623
Average policy_loss of this k_epoch: -0.01736743375658989
KL Divergence Average Loss: 0.0019218596862629056
Total Loss of this k_epoch: -0.01869155466556549


Entropy of this k_epoch: 0.024462008848786354
Average policy_loss of this k_epoch: -0.01749883033335209
KL Divergence Average Loss: 0.0012010355712845922
Total Loss of this k_epoch: -0.018709920346736908

Last k_epoch stats:
Loss: -0.0187099 | Ratio: 0.9956797 | Entropy Term: 0.0244620


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  31%|███▏      | 10/32 [00:54<02:00,  5.46s/it]

Entire Validation Dataset Accuracy: 0.9010| 173.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.028778519481420517
Average policy_loss of this k_epoch: 0.00019174069166183472
KL Divergence Average Loss: 0.0009995087748393416
Total Loss of this k_epoch: -0.001237190212123096






Entropy of this k_epoch: 0.029465386644005775
Average policy_loss of this k_epoch: 0.00024318695068359375
KL Divergence Average Loss: 0.0018069387879222631
Total Loss of this k_epoch: -0.001212012954056263






Entropy of this k_epoch: 0.02574511431157589
Average policy_loss of this k_epoch: 9.962916374206543e-05
KL Divergence Average Loss: 0.0006698785582557321
Total Loss of this k_epoch: -0.0011809278512373567


Entropy of this k_epoch: 0.03477080538868904
Average policy_loss of this k_epoch: 0.0007606334984302521
KL Divergence Average Loss: 0.01583053730428219
Total Loss of this k_epoch: -0.0008196013513952494


Entropy of this k_epoch: 0.03282180801033974
Average policy_loss of this k_epoch: 0.000365234911441803
KL Divergence Average Loss: 0.003517423290759325
Total Loss of this k_epoch: -0.0012406812747940421






Entropy of this k_epoch: 0.03055909276008606
Average policy_loss of this k_epoch: 0.00023295730352401733
KL Divergence Average Loss: 0.0006671063019894063
Total Loss of this k_epoch: -0.001288326340727508


Entropy of this k_epoch: 0.03867913410067558
Average policy_loss of this k_epoch: 0.0008224993944168091
KL Divergence Average Loss: 0.017182154580950737
Total Loss of this k_epoch: -0.0009396357927471399






Entropy of this k_epoch: 0.031195804476737976
Average policy_loss of this k_epoch: 0.0002487488090991974
KL Divergence Average Loss: 0.0009762399131432176
Total Loss of this k_epoch: -0.0013012790586799383






Entropy of this k_epoch: 0.03273628652095795
Average policy_loss of this k_epoch: 0.0003199540078639984
KL Divergence Average Loss: 0.0013029013061895967
Total Loss of this k_epoch: -0.001303831348195672


Entropy of this k_epoch: 0.03488394245505333
Average policy_loss of this k_epoch: 0.00035515427589416504
KL Divergence Average Loss: 0.0013241537380963564
Total Loss of this k_epoch: -0.0013758012792095542


Entropy of this k_epoch: 0.03911152854561806
Average policy_loss of this k_epoch: 0.00048279017210006714
KL Divergence Average Loss: 0.002444910816848278
Total Loss of this k_epoch: -0.001448337221518159






Entropy of this k_epoch: 0.039323195815086365
Average policy_loss of this k_epoch: 0.0005039647221565247
KL Divergence Average Loss: 0.001905719400383532
Total Loss of this k_epoch: -0.0014431378804147243


Entropy of this k_epoch: 0.042456015944480896
Average policy_loss of this k_epoch: 0.000588003545999527
KL Divergence Average Loss: 0.002555273473262787
Total Loss of this k_epoch: -0.001509244553744793






Entropy of this k_epoch: 0.045022428035736084
Average policy_loss of this k_epoch: 0.0006614141166210175
KL Divergence Average Loss: 0.0038932496681809425
Total Loss of this k_epoch: -0.0015507747884839773






Entropy of this k_epoch: 0.04906667023897171
Average policy_loss of this k_epoch: 0.0008075535297393799
KL Divergence Average Loss: 0.004903197288513184
Total Loss of this k_epoch: -0.0015967478975653648


Entropy of this k_epoch: 0.05448836833238602
Average policy_loss of this k_epoch: 0.0009662844240665436
KL Divergence Average Loss: 0.007081965915858746
Total Loss of this k_epoch: -0.001687314361333847


Entropy of this k_epoch: 0.06110810488462448
Average policy_loss of this k_epoch: 0.0012090504169464111
KL Divergence Average Loss: 0.008263442665338516
Total Loss of this k_epoch: -0.0017637205310165882






Entropy of this k_epoch: 0.0628785714507103
Average policy_loss of this k_epoch: 0.0012784115970134735
KL Divergence Average Loss: 0.009240971878170967
Total Loss of this k_epoch: -0.0017731074476614594


Entropy of this k_epoch: 0.0689304918050766
Average policy_loss of this k_epoch: 0.0014852508902549744
KL Divergence Average Loss: 0.01219528540968895
Total Loss of this k_epoch: -0.0018393209902569652






Entropy of this k_epoch: 0.08447302132844925
Average policy_loss of this k_epoch: 0.0021124184131622314
KL Divergence Average Loss: 0.0342031866312027
Total Loss of this k_epoch: -0.001769200898706913






Entropy of this k_epoch: 0.08697247505187988
Average policy_loss of this k_epoch: 0.0022673606872558594
KL Divergence Average Loss: 0.022163860499858856
Total Loss of this k_epoch: -0.0018596246372908354


Entropy of this k_epoch: 0.092979297041893
Average policy_loss of this k_epoch: 0.0024312660098075867
KL Divergence Average Loss: 0.02604619786143303
Total Loss of this k_epoch: -0.0019572367891669273


Entropy of this k_epoch: 0.10083618760108948
Average policy_loss of this k_epoch: 0.0027945563197135925
KL Divergence Average Loss: 0.02948850765824318
Total Loss of this k_epoch: -0.0019523678347468376






Entropy of this k_epoch: 0.1074877604842186
Average policy_loss of this k_epoch: 0.0030792728066444397
KL Divergence Average Loss: 0.03535762429237366
Total Loss of this k_epoch: -0.0019415393471717834


Entropy of this k_epoch: 0.1110852062702179
Average policy_loss of this k_epoch: 0.0032555386424064636
KL Divergence Average Loss: 0.03717253357172012
Total Loss of this k_epoch: -0.0019269962795078754






Entropy of this k_epoch: 0.11741485446691513
Average policy_loss of this k_epoch: 0.003525465726852417
KL Divergence Average Loss: 0.04388154298067093
Total Loss of this k_epoch: -0.0019064615480601788






Entropy of this k_epoch: 0.11353480070829391
Average policy_loss of this k_epoch: 0.003381691873073578
KL Divergence Average Loss: 0.039396170526742935
Total Loss of this k_epoch: -0.0019010866526514292


Entropy of this k_epoch: 0.11907228827476501
Average policy_loss of this k_epoch: 0.0036656633019447327
KL Divergence Average Loss: 0.04382418841123581
Total Loss of this k_epoch: -0.0018497095443308353


Entropy of this k_epoch: 0.12110567837953568
Average policy_loss of this k_epoch: 0.0037212446331977844
KL Divergence Average Loss: 0.04688825458288193
Total Loss of this k_epoch: -0.0018651564605534077






Entropy of this k_epoch: 0.12023194134235382
Average policy_loss of this k_epoch: 0.0036827698349952698
KL Divergence Average Loss: 0.04490943253040314
Total Loss of this k_epoch: -0.0018797330558300018


Entropy of this k_epoch: 0.11419367790222168
Average policy_loss of this k_epoch: 0.003403548151254654
KL Divergence Average Loss: 0.0398971326649189
Total Loss of this k_epoch: -0.0019071644637733698






Entropy of this k_epoch: 0.12389548867940903
Average policy_loss of this k_epoch: 0.0038996413350105286
KL Divergence Average Loss: 0.049741510301828384
Total Loss of this k_epoch: -0.001797717995941639






Entropy of this k_epoch: 0.10793730616569519
Average policy_loss of this k_epoch: 0.0031106993556022644
KL Divergence Average Loss: 0.034871626645326614
Total Loss of this k_epoch: -0.0019374496769160032


Entropy of this k_epoch: 0.10766567289829254
Average policy_loss of this k_epoch: 0.0031055063009262085
KL Divergence Average Loss: 0.034821510314941406
Total Loss of this k_epoch: -0.0019295625388622284


Entropy of this k_epoch: 0.10111861675977707
Average policy_loss of this k_epoch: 0.0028126388788223267
KL Divergence Average Loss: 0.029546912759542465
Total Loss of this k_epoch: -0.001947822980582714






Entropy of this k_epoch: 0.09792768210172653
Average policy_loss of this k_epoch: 0.0026562176644802094
KL Divergence Average Loss: 0.027668245136737823
Total Loss of this k_epoch: -0.001963484100997448


Entropy of this k_epoch: 0.0946938693523407
Average policy_loss of this k_epoch: 0.0025028176605701447
KL Divergence Average Loss: 0.02644972689449787
Total Loss of this k_epoch: -0.0019673786591738462






Entropy of this k_epoch: 0.0936431735754013
Average policy_loss of this k_epoch: 0.0024389028549194336
KL Divergence Average Loss: 0.032977934926748276
Total Loss of this k_epoch: -0.0019134762696921825






Entropy of this k_epoch: 0.08915112912654877
Average policy_loss of this k_epoch: 0.0022634975612163544
KL Divergence Average Loss: 0.02754943072795868
Total Loss of this k_epoch: -0.0019185647834092379


Entropy of this k_epoch: 0.0866524875164032
Average policy_loss of this k_epoch: 0.002147085964679718
KL Divergence Average Loss: 0.023714080452919006
Total Loss of this k_epoch: -0.0019483976066112518


Entropy of this k_epoch: 0.08668816834688187
Average policy_loss of this k_epoch: 0.0021863579750061035
KL Divergence Average Loss: 0.021472519263625145
Total Loss of this k_epoch: -0.001933325082063675






Entropy of this k_epoch: 0.08197976648807526
Average policy_loss of this k_epoch: 0.001998312771320343
KL Divergence Average Loss: 0.018367785960435867
Total Loss of this k_epoch: -0.001916998066008091


Entropy of this k_epoch: 0.08559581637382507
Average policy_loss of this k_epoch: 0.002111252397298813
KL Divergence Average Loss: 0.021613284945487976
Total Loss of this k_epoch: -0.0019524055533111095






Entropy of this k_epoch: 0.08501571416854858
Average policy_loss of this k_epoch: 0.0020908862352371216
KL Divergence Average Loss: 0.02138960175216198
Total Loss of this k_epoch: -0.0019460036419332027






Entropy of this k_epoch: 0.08767732232809067
Average policy_loss of this k_epoch: 0.0022185109555721283
KL Divergence Average Loss: 0.022448409348726273
Total Loss of this k_epoch: -0.0019408711232244968


Entropy of this k_epoch: 0.09098529815673828
Average policy_loss of this k_epoch: 0.0023362860083580017
KL Divergence Average Loss: 0.04129143804311752
Total Loss of this k_epoch: -0.0018000644631683826


Entropy of this k_epoch: 0.08509602397680283
Average policy_loss of this k_epoch: 0.0021162889897823334
KL Divergence Average Loss: 0.01951189897954464
Total Loss of this k_epoch: -0.0019433931447565556






Entropy of this k_epoch: 0.08493344485759735
Average policy_loss of this k_epoch: 0.002090863883495331
KL Divergence Average Loss: 0.02088962122797966
Total Loss of this k_epoch: -0.0019469121471047401


Entropy of this k_epoch: 0.09534488618373871
Average policy_loss of this k_epoch: 0.0025978609919548035
KL Divergence Average Loss: 0.05944327637553215
Total Loss of this k_epoch: -0.0015749505255371332






Entropy of this k_epoch: 0.0957517996430397
Average policy_loss of this k_epoch: 0.002576395869255066
KL Divergence Average Loss: 0.0259491428732872
Total Loss of this k_epoch: -0.0019517026375979185






Entropy of this k_epoch: 0.09672848880290985
Average policy_loss of this k_epoch: 0.0026082098484039307
KL Divergence Average Loss: 0.027106808498501778
Total Loss of this k_epoch: -0.001957146916538477


Entropy of this k_epoch: 0.1013931855559349
Average policy_loss of this k_epoch: 0.0028067491948604584
KL Divergence Average Loss: 0.030115874484181404
Total Loss of this k_epoch: -0.0019617516081780195


Entropy of this k_epoch: 0.0990195944905281
Average policy_loss of this k_epoch: 0.0027234330773353577
KL Divergence Average Loss: 0.029063036665320396
Total Loss of this k_epoch: -0.0019369162619113922






Entropy of this k_epoch: 0.10532722622156143
Average policy_loss of this k_epoch: 0.002996779978275299
KL Divergence Average Loss: 0.033389415591955185
Total Loss of this k_epoch: -0.001935687381774187


Entropy of this k_epoch: 0.10527336597442627
Average policy_loss of this k_epoch: 0.003011941909790039
KL Divergence Average Loss: 0.03283266723155975
Total Loss of this k_epoch: -0.0019233999773859978






Entropy of this k_epoch: 0.11110937595367432
Average policy_loss of this k_epoch: 0.0032884925603866577
KL Divergence Average Loss: 0.03724426031112671
Total Loss of this k_epoch: -0.0018945338670164347






Entropy of this k_epoch: 0.10786029696464539
Average policy_loss of this k_epoch: 0.003109131008386612
KL Divergence Average Loss: 0.03501135855913162
Total Loss of this k_epoch: -0.0019337702542543411


Entropy of this k_epoch: 0.1075153797864914
Average policy_loss of this k_epoch: 0.0031058937311172485
KL Divergence Average Loss: 0.034422315657138824
Total Loss of this k_epoch: -0.0019256521482020617


Entropy of this k_epoch: 0.10720611363649368
Average policy_loss of this k_epoch: 0.0030888989567756653
KL Divergence Average Loss: 0.03449653089046478
Total Loss of this k_epoch: -0.0019264414440840483






Entropy of this k_epoch: 0.11161208152770996
Average policy_loss of this k_epoch: 0.0034602321684360504
KL Divergence Average Loss: 0.04468271881341934
Total Loss of this k_epoch: -0.0016735447570681572


Entropy of this k_epoch: 0.10020262748003006
Average policy_loss of this k_epoch: 0.0027793794870376587
KL Divergence Average Loss: 0.029904639348387718
Total Loss of this k_epoch: -0.0019317055121064186






Entropy of this k_epoch: 0.09409746527671814
Average policy_loss of this k_epoch: 0.0025100931525230408
KL Divergence Average Loss: 0.025099800899624825
Total Loss of this k_epoch: -0.0019437824375927448




Epoch 11/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.28it/s]


Entropy of this k_epoch: 0.10247041285037994
Average policy_loss of this k_epoch: 0.0029102228581905365
KL Divergence Average Loss: 0.031096624210476875
Total Loss of this k_epoch: -0.0019023315981030464


Entropy of this k_epoch: 0.09457245469093323
Average policy_loss of this k_epoch: 0.0025217533111572266
KL Divergence Average Loss: 0.026059284806251526
Total Loss of this k_epoch: -0.001946276519447565

Last k_epoch stats:
Loss: -0.0019463 | Ratio: 0.9798260 | Entropy Term: 0.0945725


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  34%|███▍      | 11/32 [00:59<01:54,  5.46s/it]

Entire Validation Dataset Accuracy: 0.9219| 177.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.09326379746198654
Average policy_loss of this k_epoch: -0.017039960250258446
KL Divergence Average Loss: 0.0008231846732087433
Total Loss of this k_epoch: -0.02169491909444332






Entropy of this k_epoch: 0.09022331982851028
Average policy_loss of this k_epoch: 0.01236267015337944
KL Divergence Average Loss: 0.0007586331339552999
Total Loss of this k_epoch: 0.007859090343117714






Entropy of this k_epoch: 0.08021856099367142
Average policy_loss of this k_epoch: -0.009618356823921204
KL Divergence Average Loss: 0.0006533367559313774
Total Loss of this k_epoch: -0.013622751459479332


Entropy of this k_epoch: 0.0658426359295845
Average policy_loss of this k_epoch: -0.018581144511699677
KL Divergence Average Loss: 0.002077838871628046
Total Loss of this k_epoch: -0.02185249887406826


Entropy of this k_epoch: 0.050250813364982605
Average policy_loss of this k_epoch: -0.019448591396212578
KL Divergence Average Loss: 0.004505297634750605
Total Loss of this k_epoch: -0.02191607840359211






Entropy of this k_epoch: 0.041737399995326996
Average policy_loss of this k_epoch: -0.019812295213341713
KL Divergence Average Loss: 0.0067453691735863686
Total Loss of this k_epoch: -0.021831711754202843






Entropy of this k_epoch: 0.03279343992471695
Average policy_loss of this k_epoch: -0.02020891010761261
KL Divergence Average Loss: 0.008981999009847641
Total Loss of this k_epoch: -0.021758761256933212


Entropy of this k_epoch: 0.026764364913105965
Average policy_loss of this k_epoch: -0.020463377237319946
KL Divergence Average Loss: 0.01099146343767643
Total Loss of this k_epoch: -0.02169167995452881


Entropy of this k_epoch: 0.02477792277932167
Average policy_loss of this k_epoch: -0.0205545574426651
KL Divergence Average Loss: 0.014978533610701561
Total Loss of this k_epoch: -0.02164366841316223






Entropy of this k_epoch: 0.020362891256809235
Average policy_loss of this k_epoch: -0.020715799182653427
KL Divergence Average Loss: 0.01334950141608715
Total Loss of this k_epoch: -0.021600447595119476


Entropy of this k_epoch: 0.018745092675089836
Average policy_loss of this k_epoch: -0.020764240995049477
KL Divergence Average Loss: 0.01532670482993126
Total Loss of this k_epoch: -0.021548228338360786






Entropy of this k_epoch: 0.014379382133483887
Average policy_loss of this k_epoch: -0.020913338288664818
KL Divergence Average Loss: 0.015694722533226013
Total Loss of this k_epoch: -0.02147536166012287






Entropy of this k_epoch: 0.015722451731562614
Average policy_loss of this k_epoch: -0.02086477167904377
KL Divergence Average Loss: 0.017236817628145218
Total Loss of this k_epoch: -0.021478526294231415


Entropy of this k_epoch: 0.015131738968193531
Average policy_loss of this k_epoch: -0.02088409662246704
KL Divergence Average Loss: 0.02305884286761284
Total Loss of this k_epoch: -0.021410096436738968


Entropy of this k_epoch: 0.012252476066350937
Average policy_loss of this k_epoch: -0.020986752584576607
KL Divergence Average Loss: 0.01670316979289055
Total Loss of this k_epoch: -0.02143234573304653






Entropy of this k_epoch: 0.013018365949392319
Average policy_loss of this k_epoch: -0.020962610840797424
KL Divergence Average Loss: 0.01645231992006302
Total Loss of this k_epoch: -0.021449005231261253


Entropy of this k_epoch: 0.01374884508550167
Average policy_loss of this k_epoch: -0.02094009518623352
KL Divergence Average Loss: 0.021009746938943863
Total Loss of this k_epoch: -0.021417440846562386






Entropy of this k_epoch: 0.013543534092605114
Average policy_loss of this k_epoch: -0.02092546597123146
KL Divergence Average Loss: 0.02240532822906971
Total Loss of this k_epoch: -0.021378587931394577






Entropy of this k_epoch: 0.013093268498778343
Average policy_loss of this k_epoch: -0.02096978761255741
KL Divergence Average Loss: 0.01841035485267639
Total Loss of this k_epoch: -0.021440347656607628


Entropy of this k_epoch: 0.012188130989670753
Average policy_loss of this k_epoch: -0.02099836803972721
KL Divergence Average Loss: 0.019247978925704956
Total Loss of this k_epoch: -0.02141529507935047


Entropy of this k_epoch: 0.0168153028935194
Average policy_loss of this k_epoch: -0.020692165940999985
KL Divergence Average Loss: 0.03628550469875336
Total Loss of this k_epoch: -0.021170075982809067






Entropy of this k_epoch: 0.012713638134300709
Average policy_loss of this k_epoch: -0.02098095417022705
KL Divergence Average Loss: 0.01656225696206093
Total Loss of this k_epoch: -0.021451013162732124


Entropy of this k_epoch: 0.011857878416776657
Average policy_loss of this k_epoch: -0.021010123193264008
KL Divergence Average Loss: 0.017348535358905792
Total Loss of this k_epoch: -0.021429531276226044






Entropy of this k_epoch: 0.012335389852523804
Average policy_loss of this k_epoch: -0.020993035286664963
KL Divergence Average Loss: 0.01662345975637436
Total Loss of this k_epoch: -0.021443570032715797






Entropy of this k_epoch: 0.012757349759340286
Average policy_loss of this k_epoch: -0.02097105048596859
KL Divergence Average Loss: 0.016994193196296692
Total Loss of this k_epoch: -0.021438974887132645


Entropy of this k_epoch: 0.012815643101930618
Average policy_loss of this k_epoch: -0.020963512361049652
KL Divergence Average Loss: 0.01689307764172554
Total Loss of this k_epoch: -0.021435363218188286


Entropy of this k_epoch: 0.013242698274552822
Average policy_loss of this k_epoch: -0.020957333967089653
KL Divergence Average Loss: 0.01610148325562477
Total Loss of this k_epoch: -0.021458454430103302






Entropy of this k_epoch: 0.022612426429986954
Average policy_loss of this k_epoch: -0.020161446183919907
KL Divergence Average Loss: 0.08942870050668716
Total Loss of this k_epoch: -0.02039778046309948


Entropy of this k_epoch: 0.014177326112985611
Average policy_loss of this k_epoch: -0.020922694355249405
KL Divergence Average Loss: 0.015761643648147583
Total Loss of this k_epoch: -0.021473944187164307






Entropy of this k_epoch: 0.014916344545781612
Average policy_loss of this k_epoch: -0.020900120958685875
KL Divergence Average Loss: 0.015467526391148567
Total Loss of this k_epoch: -0.021491263061761856






Entropy of this k_epoch: 0.01451327744871378
Average policy_loss of this k_epoch: -0.020915323868393898
KL Divergence Average Loss: 0.015566268004477024
Total Loss of this k_epoch: -0.021485324949026108


Entropy of this k_epoch: 0.015074247494339943
Average policy_loss of this k_epoch: -0.020886994898319244
KL Divergence Average Loss: 0.015345394611358643
Total Loss of this k_epoch: -0.021487252786755562


Entropy of this k_epoch: 0.01862507313489914
Average policy_loss of this k_epoch: -0.0207575261592865
KL Divergence Average Loss: 0.013895375654101372
Total Loss of this k_epoch: -0.021549826487898827






Entropy of this k_epoch: 0.01796916499733925
Average policy_loss of this k_epoch: -0.020773548632860184
KL Divergence Average Loss: 0.01418006420135498
Total Loss of this k_epoch: -0.021530205383896828


Entropy of this k_epoch: 0.02040674537420273
Average policy_loss of this k_epoch: -0.020697476342320442
KL Divergence Average Loss: 0.013215549290180206
Total Loss of this k_epoch: -0.021585658192634583






Entropy of this k_epoch: 0.03077707812190056
Average policy_loss of this k_epoch: -0.019535576924681664
KL Divergence Average Loss: 0.019282257184386253
Total Loss of this k_epoch: -0.02088160812854767






Entropy of this k_epoch: 0.0283956378698349
Average policy_loss of this k_epoch: -0.020218683406710625
KL Divergence Average Loss: 0.01227779034525156
Total Loss of this k_epoch: -0.02151568792760372


Entropy of this k_epoch: 0.025642329826951027
Average policy_loss of this k_epoch: -0.020376842468976974
KL Divergence Average Loss: 0.012625416740775108
Total Loss of this k_epoch: -0.021532705053687096


Entropy of this k_epoch: 0.02600790746510029
Average policy_loss of this k_epoch: -0.020472979173064232
KL Divergence Average Loss: 0.011123066768050194
Total Loss of this k_epoch: -0.021662142127752304






Entropy of this k_epoch: 0.024929072707891464
Average policy_loss of this k_epoch: -0.02052343636751175
KL Divergence Average Loss: 0.01172979362308979
Total Loss of this k_epoch: -0.021652592346072197


Entropy of this k_epoch: 0.026425635442137718
Average policy_loss of this k_epoch: -0.02045661211013794
KL Divergence Average Loss: 0.011084767058491707
Total Loss of this k_epoch: -0.02166704647243023






Entropy of this k_epoch: 0.02878638356924057
Average policy_loss of this k_epoch: -0.02035963162779808
KL Divergence Average Loss: 0.010787980630993843
Total Loss of this k_epoch: -0.02169107086956501







Entropy of this k_epoch: 0.02935143932700157
Average policy_loss of this k_epoch: -0.020330719649791718
KL Divergence Average Loss: 0.010007752105593681
Total Loss of this k_epoch: -0.02169821411371231


Entropy of this k_epoch: 0.030292555689811707
Average policy_loss of this k_epoch: -0.02031627856194973
KL Divergence Average Loss: 0.009750930592417717
Total Loss of this k_epoch: -0.021733397617936134



Epoch 12/32 (Inner K-Epochs):  69%|██████▉   | 44/64 [00:03<00:01, 12.07it/s][A


Entropy of this k_epoch: 0.03136082738637924
Average policy_loss of this k_epoch: -0.0202596765011549
KL Divergence Average Loss: 0.009479381144046783
Total Loss of this k_epoch: -0.02173292450606823






Entropy of this k_epoch: 0.0355139821767807
Average policy_loss of this k_epoch: -0.020087463781237602
KL Divergence Average Loss: 0.008269419893622398
Total Loss of this k_epoch: -0.02178046852350235






Entropy of this k_epoch: 0.03625834360718727
Average policy_loss of this k_epoch: -0.020067885518074036
KL Divergence Average Loss: 0.008010497316718102
Total Loss of this k_epoch: -0.021800696849822998


Entropy of this k_epoch: 0.03666330873966217
Average policy_loss of this k_epoch: -0.02003513090312481
KL Divergence Average Loss: 0.007811469957232475
Total Loss of this k_epoch: -0.02179018035531044


Entropy of this k_epoch: 0.039986059069633484
Average policy_loss of this k_epoch: -0.0199259165674448
KL Divergence Average Loss: 0.007020344026386738
Total Loss of this k_epoch: -0.02185501717031002






Entropy of this k_epoch: 0.04187643527984619
Average policy_loss of this k_epoch: -0.019837718456983566
KL Divergence Average Loss: 0.006444783415645361
Total Loss of this k_epoch: -0.021867092698812485


Entropy of this k_epoch: 0.04912635684013367
Average policy_loss of this k_epoch: -0.019422931596636772
KL Divergence Average Loss: 0.005795017350465059
Total Loss of this k_epoch: -0.02182129956781864






Entropy of this k_epoch: 0.05074933543801308
Average policy_loss of this k_epoch: -0.01939455419778824
KL Divergence Average Loss: 0.004626809619367123
Total Loss of this k_epoch: -0.02188575267791748






Entropy of this k_epoch: 0.04779540374875069
Average policy_loss of this k_epoch: -0.01956777088344097
KL Divergence Average Loss: 0.005426640156656504
Total Loss of this k_epoch: -0.021903274580836296


Entropy of this k_epoch: 0.05272318050265312
Average policy_loss of this k_epoch: -0.019332628697156906
KL Divergence Average Loss: 0.004353110678493977
Total Loss of this k_epoch: -0.02192525565624237


Entropy of this k_epoch: 0.05636787787079811
Average policy_loss of this k_epoch: -0.01914455182850361
KL Divergence Average Loss: 0.003515138290822506
Total Loss of this k_epoch: -0.021927794441580772






Entropy of this k_epoch: 0.056148335337638855
Average policy_loss of this k_epoch: -0.01911872811615467
KL Divergence Average Loss: 0.0037560921628028154
Total Loss of this k_epoch: -0.02188858389854431


Entropy of this k_epoch: 0.05867653340101242
Average policy_loss of this k_epoch: -0.0190371572971344
KL Divergence Average Loss: 0.003003763034939766
Total Loss of this k_epoch: -0.021940946578979492






Entropy of this k_epoch: 0.06126738712191582
Average policy_loss of this k_epoch: -0.018918264657258987
KL Divergence Average Loss: 0.0027414734940975904
Total Loss of this k_epoch: -0.021954219788312912






Entropy of this k_epoch: 0.06219369173049927
Average policy_loss of this k_epoch: -0.01884174346923828
KL Divergence Average Loss: 0.0025127434637397528
Total Loss of this k_epoch: -0.02192630060017109


Entropy of this k_epoch: 0.06497160345315933
Average policy_loss of this k_epoch: -0.018722709268331528
KL Divergence Average Loss: 0.002722822828218341
Total Loss of this k_epoch: -0.021944060921669006


Entropy of this k_epoch: 0.07152935862541199
Average policy_loss of this k_epoch: -0.018324870616197586
KL Divergence Average Loss: 0.0013224759604781866
Total Loss of this k_epoch: -0.021888114511966705






Entropy of this k_epoch: 0.07100879400968552
Average policy_loss of this k_epoch: -0.01836811751127243
KL Divergence Average Loss: 0.0020627162884920835
Total Loss of this k_epoch: -0.021897930651903152


Entropy of this k_epoch: 0.0680338591337204
Average policy_loss of this k_epoch: -0.015647627413272858
KL Divergence Average Loss: 0.0019111630972474813
Total Loss of this k_epoch: -0.019030209630727768




Epoch 12/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.28it/s]

Entropy of this k_epoch: 0.06790046393871307
Average policy_loss of this k_epoch: -0.01854548789560795
KL Divergence Average Loss: 0.001730085932649672
Total Loss of this k_epoch: -0.021923210471868515

Last k_epoch stats:
Loss: -0.0219232 | Ratio: 1.0019259 | Entropy Term: 0.0679005



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  38%|███▊      | 12/32 [01:05<01:49,  5.47s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.06299386918544769
Average policy_loss of this k_epoch: 0.000262383371591568
KL Divergence Average Loss: 0.0007136576459743083
Total Loss of this k_epoch: -0.002880173735320568






Entropy of this k_epoch: 0.05971212312579155
Average policy_loss of this k_epoch: 0.00015176460146903992
KL Divergence Average Loss: 0.0007749145734123886
Total Loss of this k_epoch: -0.002826092531904578






Entropy of this k_epoch: 0.05813206732273102
Average policy_loss of this k_epoch: 9.328126907348633e-05
KL Divergence Average Loss: 0.0006929044611752033
Total Loss of this k_epoch: -0.0028063931968063116


Entropy of this k_epoch: 0.0563286617398262
Average policy_loss of this k_epoch: 2.3253262042999268e-05
KL Divergence Average Loss: 0.00035379105247557163
Total Loss of this k_epoch: -0.0027896419633179903


Entropy of this k_epoch: 0.060693442821502686
Average policy_loss of this k_epoch: 0.00016854703426361084
KL Divergence Average Loss: 0.0011066407896578312
Total Loss of this k_epoch: -0.0028550587594509125






Entropy of this k_epoch: 0.05685632675886154
Average policy_loss of this k_epoch: 3.7629157304763794e-05
KL Divergence Average Loss: 0.0004182743141427636
Total Loss of this k_epoch: -0.002801004331558943


Entropy of this k_epoch: 0.05664663761854172
Average policy_loss of this k_epoch: 3.3035874366760254e-05
KL Divergence Average Loss: 0.00033045397140085697
Total Loss of this k_epoch: -0.002795991487801075






Entropy of this k_epoch: 0.06008383631706238
Average policy_loss of this k_epoch: 0.00016187503933906555
KL Divergence Average Loss: 0.0005891952314414084
Total Loss of this k_epoch: -0.002836424857378006






Entropy of this k_epoch: 0.06419974565505981
Average policy_loss of this k_epoch: 0.00032030045986175537
KL Divergence Average Loss: 0.0011557884281501174
Total Loss of this k_epoch: -0.002878129016608


Entropy of this k_epoch: 0.0712389349937439
Average policy_loss of this k_epoch: 0.0005625709891319275
KL Divergence Average Loss: 0.001225972082465887
Total Loss of this k_epoch: -0.0029871161095798016


Entropy of this k_epoch: 0.07161378860473633
Average policy_loss of this k_epoch: 0.0005809739232063293
KL Divergence Average Loss: 0.0010809857631102204
Total Loss of this k_epoch: -0.002988905645906925






Entropy of this k_epoch: 0.07769230008125305
Average policy_loss of this k_epoch: 0.0008238404989242554
KL Divergence Average Loss: 0.0020892133470624685
Total Loss of this k_epoch: -0.003039882518351078


Entropy of this k_epoch: 0.08080172538757324
Average policy_loss of this k_epoch: 0.0009464547038078308
KL Divergence Average Loss: 0.0027094220276921988
Total Loss of this k_epoch: -0.0030665372032672167






Entropy of this k_epoch: 0.0838923528790474
Average policy_loss of this k_epoch: 0.0010717958211898804
KL Divergence Average Loss: 0.0029520865064114332
Total Loss of this k_epoch: -0.003093301085755229






Entropy of this k_epoch: 0.09008001536130905
Average policy_loss of this k_epoch: 0.0013281777501106262
KL Divergence Average Loss: 0.004853073973208666
Total Loss of this k_epoch: -0.003127292264252901


Entropy of this k_epoch: 0.09821268171072006
Average policy_loss of this k_epoch: 0.0016812942922115326
KL Divergence Average Loss: 0.00643410999327898
Total Loss of this k_epoch: -0.0031649989541620016


Entropy of this k_epoch: 0.10680336505174637
Average policy_loss of this k_epoch: 0.002074059098958969
KL Divergence Average Loss: 0.009084003046154976
Total Loss of this k_epoch: -0.003175269113853574






Entropy of this k_epoch: 0.11051715910434723
Average policy_loss of this k_epoch: 0.00224250927567482
KL Divergence Average Loss: 0.010064487345516682
Total Loss of this k_epoch: -0.003182704094797373


Entropy of this k_epoch: 0.11451055109500885
Average policy_loss of this k_epoch: 0.002422962337732315
KL Divergence Average Loss: 0.01127629354596138
Total Loss of this k_epoch: -0.0031898024026304483






Entropy of this k_epoch: 0.1194261759519577
Average policy_loss of this k_epoch: 0.00266236811876297
KL Divergence Average Loss: 0.01416093297302723
Total Loss of this k_epoch: -0.003167331451550126






Entropy of this k_epoch: 0.1244371309876442
Average policy_loss of this k_epoch: 0.0029118992388248444
KL Divergence Average Loss: 0.016920601949095726
Total Loss of this k_epoch: -0.0031407512724399567


Entropy of this k_epoch: 0.12379959225654602
Average policy_loss of this k_epoch: 0.002871498465538025
KL Divergence Average Loss: 0.015580300241708755
Total Loss of this k_epoch: -0.00316267809830606


Entropy of this k_epoch: 0.1253969818353653
Average policy_loss of this k_epoch: 0.002965029329061508
KL Divergence Average Loss: 0.01596185564994812
Total Loss of this k_epoch: -0.0031452015973627567






Entropy of this k_epoch: 0.12350696325302124
Average policy_loss of this k_epoch: 0.0028750598430633545
KL Divergence Average Loss: 0.014931073412299156
Total Loss of this k_epoch: -0.0031509774271398783


Entropy of this k_epoch: 0.12146944552659988
Average policy_loss of this k_epoch: 0.002750314772129059
KL Divergence Average Loss: 0.01403922587633133
Total Loss of this k_epoch: -0.0031827655620872974






Entropy of this k_epoch: 0.1263718158006668
Average policy_loss of this k_epoch: 0.003016039729118347
KL Divergence Average Loss: 0.01684035360813141
Total Loss of this k_epoch: -0.0031341477297246456






Entropy of this k_epoch: 0.1251170039176941
Average policy_loss of this k_epoch: 0.002969413995742798
KL Divergence Average Loss: 0.01611706241965294
Total Loss of this k_epoch: -0.003125265473499894


Entropy of this k_epoch: 0.1234467625617981
Average policy_loss of this k_epoch: 0.0028687268495559692
KL Divergence Average Loss: 0.014889840967953205
Total Loss of this k_epoch: -0.0031547127291560173


Entropy of this k_epoch: 0.12314361333847046
Average policy_loss of this k_epoch: 0.0028461292386054993
KL Divergence Average Loss: 0.015726592391729355
Total Loss of this k_epoch: -0.0031537855975329876






Entropy of this k_epoch: 0.11422425508499146
Average policy_loss of this k_epoch: 0.002437952905893326
KL Divergence Average Loss: 0.012624119408428669
Total Loss of this k_epoch: -0.0031470186077058315


Entropy of this k_epoch: 0.11525262892246246
Average policy_loss of this k_epoch: 0.002447068691253662
KL Divergence Average Loss: 0.01432688906788826
Total Loss of this k_epoch: -0.0031722940038889647






Entropy of this k_epoch: 0.11596834659576416
Average policy_loss of this k_epoch: 0.0024616271257400513
KL Divergence Average Loss: 0.013781149871647358
Total Loss of this k_epoch: -0.0031989789567887783






Entropy of this k_epoch: 0.1059194803237915
Average policy_loss of this k_epoch: 0.002033524215221405
KL Divergence Average Loss: 0.008494430221617222
Total Loss of this k_epoch: -0.003177505685016513


Entropy of this k_epoch: 0.11238497495651245
Average policy_loss of this k_epoch: 0.002338021993637085
KL Divergence Average Loss: 0.010874178260564804
Total Loss of this k_epoch: -0.0031724851578474045


Entropy of this k_epoch: 0.10371090471744537
Average policy_loss of this k_epoch: 0.0019398331642150879
KL Divergence Average Loss: 0.008154688403010368
Total Loss of this k_epoch: -0.0031641654204577208






Entropy of this k_epoch: 0.10307066142559052
Average policy_loss of this k_epoch: 0.0019024163484573364
KL Divergence Average Loss: 0.00804236438125372
Total Loss of this k_epoch: -0.0031706930603832006


Entropy of this k_epoch: 0.10203316807746887
Average policy_loss of this k_epoch: 0.0018551051616668701
KL Divergence Average Loss: 0.007386251352727413
Total Loss of this k_epoch: -0.003172690747305751





Epoch 13/32 (Inner K-Epochs):  59%|█████▉    | 38/64 [00:03<00:02, 12.43it/s]

Entropy of this k_epoch: 0.09509049355983734
Average policy_loss of this k_epoch: 0.0015452578663825989
KL Divergence Average Loss: 0.005438884720206261
Total Loss of this k_epoch: -0.0031548780389130116



[A


Entropy of this k_epoch: 0.09859791398048401
Average policy_loss of this k_epoch: 0.0017022937536239624
KL Divergence Average Loss: 0.006416769698262215
Total Loss of this k_epoch: -0.0031634345650672913


Entropy of this k_epoch: 0.0986269861459732
Average policy_loss of this k_epoch: 0.0017102137207984924
KL Divergence Average Loss: 0.00673300214111805
Total Loss of this k_epoch: -0.003153805620968342






Entropy of this k_epoch: 0.10094139724969864
Average policy_loss of this k_epoch: 0.0017919912934303284
KL Divergence Average Loss: 0.007032675668597221
Total Loss of this k_epoch: -0.0031847518403083086


Entropy of this k_epoch: 0.10661062598228455
Average policy_loss of this k_epoch: 0.0020656436681747437
KL Divergence Average Loss: 0.009120079688727856
Total Loss of this k_epoch: -0.0031736870296299458


Entropy of this k_epoch: 0.09918542206287384
Average policy_loss of this k_epoch: 0.0017349347472190857
KL Divergence Average Loss: 0.007247858215123415
Total Loss of this k_epoch: -0.003151857992634177






Entropy of this k_epoch: 0.09769658744335175
Average policy_loss of this k_epoch: 0.0016222819685935974
KL Divergence Average Loss: 0.009418636560440063
Total Loss of this k_epoch: -0.00316836079582572


Entropy of this k_epoch: 0.09998096525669098
Average policy_loss of this k_epoch: 0.0017535723745822906
KL Divergence Average Loss: 0.006703646387904882
Total Loss of this k_epoch: -0.0031784395687282085


Entropy of this k_epoch: 0.10602081567049026
Average policy_loss of this k_epoch: 0.002030428498983383
KL Divergence Average Loss: 0.009388989768922329
Total Loss of this k_epoch: -0.003176722675561905






Entropy of this k_epoch: 0.10466332733631134
Average policy_loss of this k_epoch: 0.0019751787185668945
KL Divergence Average Loss: 0.008237511850893497
Total Loss of this k_epoch: -0.0031756125390529633


Entropy of this k_epoch: 0.10899931192398071
Average policy_loss of this k_epoch: 0.002140343189239502
KL Divergence Average Loss: 0.012886525131762028
Total Loss of this k_epoch: -0.003180757164955139


Entropy of this k_epoch: 0.10632478445768356
Average policy_loss of this k_epoch: 0.00203530490398407
KL Divergence Average Loss: 0.008897094987332821
Total Loss of this k_epoch: -0.0031919635366648436






Entropy of this k_epoch: 0.11095750331878662
Average policy_loss of this k_epoch: 0.002262309193611145
KL Divergence Average Loss: 0.010227017104625702
Total Loss of this k_epoch: -0.0031832957174628973


Entropy of this k_epoch: 0.11114437878131866
Average policy_loss of this k_epoch: 0.0022705383598804474
KL Divergence Average Loss: 0.010570058599114418
Total Loss of this k_epoch: -0.003180979983881116


Entropy of this k_epoch: 0.11341573297977448
Average policy_loss of this k_epoch: 0.002377476543188095
KL Divergence Average Loss: 0.011137431487441063
Total Loss of this k_epoch: -0.0031819359865039587






Entropy of this k_epoch: 0.11584679037332535
Average policy_loss of this k_epoch: 0.002486869692802429
KL Divergence Average Loss: 0.01173861138522625
Total Loss of this k_epoch: -0.003188083879649639


Entropy of this k_epoch: 0.11639884114265442
Average policy_loss of this k_epoch: 0.0025238245725631714
KL Divergence Average Loss: 0.012269800528883934
Total Loss of this k_epoch: -0.003173419740051031


Entropy of this k_epoch: 0.1128876805305481
Average policy_loss of this k_epoch: 0.002353966236114502
KL Divergence Average Loss: 0.010809902101755142
Total Loss of this k_epoch: -0.0031823189929127693






Entropy of this k_epoch: 0.11387240141630173
Average policy_loss of this k_epoch: 0.002414606511592865
KL Divergence Average Loss: 0.011536739766597748
Total Loss of this k_epoch: -0.0031636462081223726


Entropy of this k_epoch: 0.116202712059021
Average policy_loss of this k_epoch: 0.002485714852809906
KL Divergence Average Loss: 0.013716286979615688
Total Loss of this k_epoch: -0.003187257796525955


Entropy of this k_epoch: 0.11605605483055115
Average policy_loss of this k_epoch: 0.002490706741809845
KL Divergence Average Loss: 0.01232954766601324
Total Loss of this k_epoch: -0.0031888005323708057






Entropy of this k_epoch: 0.11163301020860672
Average policy_loss of this k_epoch: 0.0023025497794151306
KL Divergence Average Loss: 0.010774383321404457
Total Loss of this k_epoch: -0.003171356860548258


Entropy of this k_epoch: 0.11496986448764801
Average policy_loss of this k_epoch: 0.0024490319192409515
KL Divergence Average Loss: 0.011604086495935917
Total Loss of this k_epoch: -0.003183420281857252


Entropy of this k_epoch: 0.10927268117666245
Average policy_loss of this k_epoch: 0.0022062882781028748
KL Divergence Average Loss: 0.010163419879972935
Total Loss of this k_epoch: -0.0031557115726172924




Epoch 13/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.30it/s]


Entropy of this k_epoch: 0.10802190005779266
Average policy_loss of this k_epoch: 0.002124480903148651
KL Divergence Average Loss: 0.009540373459458351
Total Loss of this k_epoch: -0.0031812102533876896


Entropy of this k_epoch: 0.11033575236797333
Average policy_loss of this k_epoch: 0.0022143274545669556
KL Divergence Average Loss: 0.011363585479557514
Total Loss of this k_epoch: -0.0031888242810964584


Entropy of this k_epoch: 0.1073041707277298
Average policy_loss of this k_epoch: 0.002082154154777527
KL Divergence Average Loss: 0.00966955628246069
Total Loss of this k_epoch: -0.0031863590702414513

Last k_epoch stats:
Loss: -0.0031864 | Ratio: 0.9833428 | Entropy Term: 0.1073042


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  41%|████      | 13/32 [01:10<01:43,  5.46s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.3191, 0.3191, 0.3191, 0.3191, 0.3191, 0.3191, 0.3191, 0.3191, 0.3191,
        0.3191], device='cuda:0')





Entropy of this k_epoch: 0.10893481224775314
Average policy_loss of this k_epoch: 0.0613565631210804
KL Divergence Average Loss: 0.0008513919310644269
Total Loss of this k_epoch: 0.055918335914611816






Entropy of this k_epoch: 0.10135054588317871
Average policy_loss of this k_epoch: 0.0005942955613136292
KL Divergence Average Loss: 0.0008834673790261149
Total Loss of this k_epoch: -0.004464397206902504






Entropy of this k_epoch: 0.08443303406238556
Average policy_loss of this k_epoch: -0.021752309054136276
KL Divergence Average Loss: 0.0019340433645993471
Total Loss of this k_epoch: -0.02595462091267109


Entropy of this k_epoch: 0.07403993606567383
Average policy_loss of this k_epoch: -0.04361618682742119
KL Divergence Average Loss: 0.0025969892740249634
Total Loss of this k_epoch: -0.04729221388697624


Entropy of this k_epoch: 0.057532720267772675
Average policy_loss of this k_epoch: -0.05170409008860588
KL Divergence Average Loss: 0.0067972457036376
Total Loss of this k_epoch: -0.05451275408267975






Entropy of this k_epoch: 0.044387564063072205
Average policy_loss of this k_epoch: -0.052692390978336334
KL Divergence Average Loss: 0.009923725388944149
Total Loss of this k_epoch: -0.05481253191828728


Entropy of this k_epoch: 0.03324264660477638
Average policy_loss of this k_epoch: -0.0533914715051651
KL Divergence Average Loss: 0.01102506648749113
Total Loss of this k_epoch: -0.054943352937698364


Entropy of this k_epoch: 0.029634933918714523
Average policy_loss of this k_epoch: -0.0536474734544754
KL Divergence Average Loss: 0.014688889496028423




Total Loss of this k_epoch: -0.05498233065009117


Entropy of this k_epoch: 0.025294233113527298
Average policy_loss of this k_epoch: -0.05385780707001686
KL Divergence Average Loss: 0.020686093717813492
Total Loss of this k_epoch: -0.0549156554043293


Entropy of this k_epoch: 0.025941234081983566
Average policy_loss of this k_epoch: -0.05369732528924942
KL Divergence Average Loss: 0.030194176360964775
Total Loss of this k_epoch: -0.05469244346022606






Entropy of this k_epoch: 0.014424087479710579
Average policy_loss of this k_epoch: -0.0545547753572464
KL Divergence Average Loss: 0.018059134483337402
Total Loss of this k_epoch: -0.05509538948535919


Entropy of this k_epoch: 0.01383041962981224
Average policy_loss of this k_epoch: -0.05457785353064537
KL Divergence Average Loss: 0.02024814486503601
Total Loss of this k_epoch: -0.05506689473986626


Entropy of this k_epoch: 0.015553588047623634
Average policy_loss of this k_epoch: -0.05435921996831894
KL Divergence Average Loss: 0.02885909378528595
Total Loss of this k_epoch: -0.054848309606313705






Entropy of this k_epoch: 0.010453985072672367
Average policy_loss of this k_epoch: -0.05475056543946266
KL Divergence Average Loss: 0.01990703120827675
Total Loss of this k_epoch: -0.055074192583560944


Entropy of this k_epoch: 0.00966074038296938
Average policy_loss of this k_epoch: -0.05477225035429001
KL Divergence Average Loss: 0.020385518670082092
Total Loss of this k_epoch: -0.05505143105983734


Entropy of this k_epoch: 0.008838094770908356
Average policy_loss of this k_epoch: -0.054823730140924454
KL Divergence Average Loss: 0.020827103406190872
Total Loss of this k_epoch: -0.05505736544728279






Entropy of this k_epoch: 0.00805133581161499
Average policy_loss of this k_epoch: -0.05487912893295288
KL Divergence Average Loss: 0.02111901342868805
Total Loss of this k_epoch: -0.055070504546165466


Entropy of this k_epoch: 0.007704997435212135
Average policy_loss of this k_epoch: -0.05488213524222374
KL Divergence Average Loss: 0.021314173936843872
Total Loss of this k_epoch: -0.05505424365401268


Entropy of this k_epoch: 0.007128838449716568
Average policy_loss of this k_epoch: -0.05490880459547043
KL Divergence Average Loss: 0.02170710638165474
Total Loss of this k_epoch: -0.055048175156116486






Entropy of this k_epoch: 0.0069585321471095085
Average policy_loss of this k_epoch: -0.05490875989198685
KL Divergence Average Loss: 0.021710917353630066
Total Loss of this k_epoch: -0.055039577186107635


Entropy of this k_epoch: 0.006832887884229422
Average policy_loss of this k_epoch: -0.054913174360990524
KL Divergence Average Loss: 0.02178674377501011
Total Loss of this k_epoch: -0.05503695085644722


Entropy of this k_epoch: 0.006711151450872421
Average policy_loss of this k_epoch: -0.05491285026073456
KL Divergence Average Loss: 0.021885987371206284
Total Loss of this k_epoch: -0.055029548704624176






Entropy of this k_epoch: 0.006266653537750244
Average policy_loss of this k_epoch: -0.05495665222406387
KL Divergence Average Loss: 0.0220627523958683
Total Loss of this k_epoch: -0.055049359798431396


Entropy of this k_epoch: 0.0062465788796544075
Average policy_loss of this k_epoch: -0.05495935305953026
KL Divergence Average Loss: 0.022093530744314194
Total Loss of this k_epoch: -0.055050745606422424


Entropy of this k_epoch: 0.006020377855747938
Average policy_loss of this k_epoch: -0.05495410040020943
KL Divergence Average Loss: 0.0221997257322073
Total Loss of this k_epoch: -0.0550331212580204






Entropy of this k_epoch: 0.013105467893183231
Average policy_loss of this k_epoch: -0.054002635180950165
KL Divergence Average Loss: 0.02346719801425934
Total Loss of this k_epoch: -0.05442323908209801


Entropy of this k_epoch: 0.005370550788938999
Average policy_loss of this k_epoch: -0.05499300733208656
KL Divergence Average Loss: 0.022547658532857895
Total Loss of this k_epoch: -0.05503605678677559


Entropy of this k_epoch: 0.006870402954518795
Average policy_loss of this k_epoch: -0.054899346083402634
KL Divergence Average Loss: 0.021899934858083725
Total Loss of this k_epoch: -0.05502386763691902






Entropy of this k_epoch: 0.005340250208973885
Average policy_loss of this k_epoch: -0.05498987436294556
KL Divergence Average Loss: 0.02258182317018509
Total Loss of this k_epoch: -0.05503106862306595


Entropy of this k_epoch: 0.004852039273828268
Average policy_loss of this k_epoch: -0.055014677345752716
KL Divergence Average Loss: 0.022839367389678955
Total Loss of this k_epoch: -0.05502888560295105


Entropy of this k_epoch: 0.004982680547982454
Average policy_loss of this k_epoch: -0.055003825575113297
KL Divergence Average Loss: 0.02277064509689808
Total Loss of this k_epoch: -0.05502525344491005






Entropy of this k_epoch: 0.004900174215435982
Average policy_loss of this k_epoch: -0.0550115704536438
KL Divergence Average Loss: 0.02281758561730385
Total Loss of this k_epoch: -0.05502840504050255


Entropy of this k_epoch: 0.00468077976256609
Average policy_loss of this k_epoch: -0.05502636358141899
KL Divergence Average Loss: 0.02292320877313614
Total Loss of this k_epoch: -0.05503116920590401


Entropy of this k_epoch: 0.004606420639902353
Average policy_loss of this k_epoch: -0.05503752827644348
KL Divergence Average Loss: 0.023001089692115784
Total Loss of this k_epoch: -0.055037837475538254






Entropy of this k_epoch: 0.004441522061824799
Average policy_loss of this k_epoch: -0.055026568472385406
KL Divergence Average Loss: 0.023070933297276497
Total Loss of this k_epoch: -0.05501793324947357


Entropy of this k_epoch: 0.0054881274700164795
Average policy_loss of this k_epoch: -0.05499577522277832
KL Divergence Average Loss: 0.023577362298965454
Total Loss of this k_epoch: -0.05503440648317337


Entropy of this k_epoch: 0.0046957144513726234
Average policy_loss of this k_epoch: -0.0550207756459713
KL Divergence Average Loss: 0.02292393520474434
Total Loss of this k_epoch: -0.05502632260322571






Entropy of this k_epoch: 0.0045273625291883945
Average policy_loss of this k_epoch: -0.0550360269844532
KL Divergence Average Loss: 0.023091455921530724
Total Loss of this k_epoch: -0.05503147840499878


Entropy of this k_epoch: 0.004633991979062557
Average policy_loss of this k_epoch: -0.05502172186970711
KL Divergence Average Loss: 0.02295791730284691
Total Loss of this k_epoch: -0.05502384155988693


Entropy of this k_epoch: 0.009138140827417374
Average policy_loss of this k_epoch: -0.05468052998185158
KL Divergence Average Loss: 0.037898872047662735
Total Loss of this k_epoch: -0.05475844815373421






Entropy of this k_epoch: 0.0043543013744056225
Average policy_loss of this k_epoch: -0.05503075569868088
KL Divergence Average Loss: 0.023123621940612793
Total Loss of this k_epoch: -0.05501723289489746


Entropy of this k_epoch: 0.004364487715065479
Average policy_loss of this k_epoch: -0.05503012239933014
KL Divergence Average Loss: 0.023122966289520264
Total Loss of this k_epoch: -0.05501711741089821


Entropy of this k_epoch: 0.0044319964945316315
Average policy_loss of this k_epoch: -0.05502871796488762
KL Divergence Average Loss: 0.02309294044971466
Total Loss of this k_epoch: -0.05501938611268997






Entropy of this k_epoch: 0.004652873147279024
Average policy_loss of this k_epoch: -0.05501972883939743
KL Divergence Average Loss: 0.02301819622516632
Total Loss of this k_epoch: -0.055022191256284714


Entropy of this k_epoch: 0.004625639412552118
Average policy_loss of this k_epoch: -0.05502605438232422
KL Divergence Average Loss: 0.023076307028532028
Total Loss of this k_epoch: -0.055026572197675705


Entropy of this k_epoch: 0.004610637202858925
Average policy_loss of this k_epoch: -0.05502602458000183
KL Divergence Average Loss: 0.022973045706748962
Total Loss of this k_epoch: -0.055026825517416






Entropy of this k_epoch: 0.004938289523124695
Average policy_loss of this k_epoch: -0.055011335760354996
KL Divergence Average Loss: 0.022783270105719566
Total Loss of this k_epoch: -0.055030420422554016


Entropy of this k_epoch: 0.004944751970469952
Average policy_loss of this k_epoch: -0.05501234531402588
KL Divergence Average Loss: 0.02277994528412819
Total Loss of this k_epoch: -0.05503178387880325


Entropy of this k_epoch: 0.005880745127797127
Average policy_loss of this k_epoch: -0.0549614354968071
KL Divergence Average Loss: 0.02227814309298992
Total Loss of this k_epoch: -0.05503269284963608






Entropy of this k_epoch: 0.004952660296112299
Average policy_loss of this k_epoch: -0.05500603839755058
KL Divergence Average Loss: 0.022785622626543045
Total Loss of this k_epoch: -0.05502581223845482


Entropy of this k_epoch: 0.00582992983981967
Average policy_loss of this k_epoch: -0.054950833320617676
KL Divergence Average Loss: 0.02237807586789131
Total Loss of this k_epoch: -0.05501854792237282


Entropy of this k_epoch: 0.005314936861395836
Average policy_loss of this k_epoch: -0.05499252304434776
KL Divergence Average Loss: 0.022582244127988815
Total Loss of this k_epoch: -0.05503244698047638






Entropy of this k_epoch: 0.005350959021598101
Average policy_loss of this k_epoch: -0.054992157965898514
KL Divergence Average Loss: 0.02259005606174469
Total Loss of this k_epoch: -0.05503380298614502


Entropy of this k_epoch: 0.006148114334791899
Average policy_loss of this k_epoch: -0.054961126297712326
KL Divergence Average Loss: 0.02212352678179741
Total Loss of this k_epoch: -0.05504729971289635


Entropy of this k_epoch: 0.005790911614894867
Average policy_loss of this k_epoch: -0.05498296394944191
KL Divergence Average Loss: 0.02233288809657097
Total Loss of this k_epoch: -0.05504918098449707






Entropy of this k_epoch: 0.005421762354671955
Average policy_loss of this k_epoch: -0.0549880787730217
KL Divergence Average Loss: 0.022548478096723557
Total Loss of this k_epoch: -0.05503368377685547


Entropy of this k_epoch: 0.006404370069503784
Average policy_loss of this k_epoch: -0.054936733096838
KL Divergence Average Loss: 0.022014113143086433
Total Loss of this k_epoch: -0.05503680929541588


Entropy of this k_epoch: 0.005285046994686127
Average policy_loss of this k_epoch: -0.054985709488391876
KL Divergence Average Loss: 0.022611182183027267
Total Loss of this k_epoch: -0.05502385273575783






Entropy of this k_epoch: 0.005930604413151741
Average policy_loss of this k_epoch: -0.05497455969452858
KL Divergence Average Loss: 0.022250935435295105
Total Loss of this k_epoch: -0.05504858121275902


Entropy of this k_epoch: 0.006470461376011372
Average policy_loss of this k_epoch: -0.054941803216934204
KL Divergence Average Loss: 0.021958831697702408
Total Loss of this k_epoch: -0.05504573881626129


Entropy of this k_epoch: 0.0060823713429272175
Average policy_loss of this k_epoch: -0.05495978146791458
KL Divergence Average Loss: 0.02215588092803955
Total Loss of this k_epoch: -0.055042341351509094




Epoch 14/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.21it/s]


Entropy of this k_epoch: 0.006555542349815369
Average policy_loss of this k_epoch: -0.05492876097559929
KL Divergence Average Loss: 0.021922437474131584
Total Loss of this k_epoch: -0.05503731220960617


Entropy of this k_epoch: 0.006836090702563524
Average policy_loss of this k_epoch: -0.054912954568862915
KL Divergence Average Loss: 0.021780090406537056
Total Loss of this k_epoch: -0.05503695830702782


Entropy of this k_epoch: 0.006662630941718817
Average policy_loss of this k_epoch: -0.05493050441145897
KL Divergence Average Loss: 0.021866459399461746
Total Loss of this k_epoch: -0.05504497140645981

Last k_epoch stats:
Loss: -0.0550450 | Ratio: 0.9454715 | Entropy Term: 0.0066626


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  44%|████▍     | 14/32 [01:16<01:38,  5.47s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.006333640310913324
Average policy_loss of this k_epoch: 1.3902783393859863e-05
KL Divergence Average Loss: 6.069946175557561e-05
Total Loss of this k_epoch: -0.0003021722368430346






Entropy of this k_epoch: 0.007081881165504456
Average policy_loss of this k_epoch: 2.894178032875061e-05
KL Divergence Average Loss: 7.596638897666708e-05
Total Loss of this k_epoch: -0.0003243926039431244






Entropy of this k_epoch: 0.007313713431358337
Average policy_loss of this k_epoch: 3.3624470233917236e-05
KL Divergence Average Loss: 0.00010692865907913074
Total Loss of this k_epoch: -0.00033099192660301924


Entropy of this k_epoch: 0.007353984285145998
Average policy_loss of this k_epoch: 3.56137752532959e-05
KL Divergence Average Loss: 0.00016371492529287934
Total Loss of this k_epoch: -0.0003304482961539179


Entropy of this k_epoch: 0.007598581723868847
Average policy_loss of this k_epoch: 3.912299871444702e-05
KL Divergence Average Loss: 0.00010071269934996963
Total Loss of this k_epoch: -0.00033979894942604005






Entropy of this k_epoch: 0.0072571467608213425
Average policy_loss of this k_epoch: 3.25813889503479e-05
KL Divergence Average Loss: 0.00012782459089066833
Total Loss of this k_epoch: -0.0003289977030362934


Entropy of this k_epoch: 0.008467692881822586
Average policy_loss of this k_epoch: 5.7559460401535034e-05
KL Divergence Average Loss: 0.00018672685837373137
Total Loss of this k_epoch: -0.0003639579226728529






Entropy of this k_epoch: 0.00828978605568409
Average policy_loss of this k_epoch: 5.513429641723633e-05
KL Divergence Average Loss: 0.00020817773474846035
Total Loss of this k_epoch: -0.00035727323847822845






Entropy of this k_epoch: 0.007658872753381729
Average policy_loss of this k_epoch: 4.158914089202881e-05
KL Divergence Average Loss: 0.00017256847058888525
Total Loss of this k_epoch: -0.00033962880843319


Entropy of this k_epoch: 0.0081082284450531
Average policy_loss of this k_epoch: 5.094707012176514e-05
KL Divergence Average Loss: 0.00020632865198422223
Total Loss of this k_epoch: -0.0003524010826367885


Entropy of this k_epoch: 0.008798874914646149
Average policy_loss of this k_epoch: 6.607919931411743e-05
KL Divergence Average Loss: 0.000275844126008451
Total Loss of this k_epoch: -0.00037110611447133124






Entropy of this k_epoch: 0.010364902205765247
Average policy_loss of this k_epoch: 9.877979755401611e-05
KL Divergence Average Loss: 0.00040308618918061256
Total Loss of this k_epoch: -0.0004154344496782869


Entropy of this k_epoch: 0.009596742689609528
Average policy_loss of this k_epoch: 8.227676153182983e-05
KL Divergence Average Loss: 0.00030437158420681953
Total Loss of this k_epoch: -0.00039451668271794915






Entropy of this k_epoch: 0.01179926935583353
Average policy_loss of this k_epoch: 0.00013156980276107788
KL Divergence Average Loss: 0.0006461470038630068
Total Loss of this k_epoch: -0.0004519321955740452






Entropy of this k_epoch: 0.009340195916593075
Average policy_loss of this k_epoch: 7.682293653488159e-05
KL Divergence Average Loss: 0.00033367096330039203
Total Loss of this k_epoch: -0.00038685015169903636


Entropy of this k_epoch: 0.011148229241371155
Average policy_loss of this k_epoch: 0.00011804699897766113
KL Divergence Average Loss: 0.000578778563067317
Total Loss of this k_epoch: -0.00043357667163945735


Entropy of this k_epoch: 0.012953167781233788
Average policy_loss of this k_epoch: 0.00016017258167266846
KL Divergence Average Loss: 0.0009355012443847954
Total Loss of this k_epoch: -0.0004781308234669268






Entropy of this k_epoch: 0.013659438118338585
Average policy_loss of this k_epoch: 0.00017438828945159912
KL Divergence Average Loss: 0.0009995319414883852
Total Loss of this k_epoch: -0.0004985883133485913


Entropy of this k_epoch: 0.01338717620819807
Average policy_loss of this k_epoch: 0.00016754493117332458
KL Divergence Average Loss: 0.0009661903022788465
Total Loss of this k_epoch: -0.0004921520012430847






Entropy of this k_epoch: 0.01467070635408163
Average policy_loss of this k_epoch: 0.00019714981317520142
KL Divergence Average Loss: 0.0011205249466001987
Total Loss of this k_epoch: -0.0005251802504062653





Epoch 15/32 (Inner K-Epochs):  34%|███▍      | 22/64 [00:01<00:03, 12.39it/s]

Entropy of this k_epoch: 0.014798816293478012
Average policy_loss of this k_epoch: 0.00020103156566619873
KL Divergence Average Loss: 0.001206174842081964
Total Loss of this k_epoch: -0.0005268475506454706


Entropy of this k_epoch: 0.016514591872692108
Average policy_loss of this k_epoch: 0.00024376064538955688
KL Divergence Average Loss: 0.0017206217162311077
Total Loss of this k_epoch: -0.0005647627403959632



[A


Entropy of this k_epoch: 0.020772110670804977
Average policy_loss of this k_epoch: 0.0003497302532196045
KL Divergence Average Loss: 0.0029374300502240658
Total Loss of this k_epoch: -0.0006595010636374354






Entropy of this k_epoch: 0.01724480837583542
Average policy_loss of this k_epoch: 0.00025987252593040466
KL Divergence Average Loss: 0.001813221606425941
Total Loss of this k_epoch: -0.0005842357059009373






Entropy of this k_epoch: 0.021313369274139404
Average policy_loss of this k_epoch: 0.00036627426743507385
KL Divergence Average Loss: 0.0032607719767838717
Total Loss of this k_epoch: -0.0006667865673080087


Entropy of this k_epoch: 0.02126334235072136
Average policy_loss of this k_epoch: 0.00037001073360443115
KL Divergence Average Loss: 0.0036414996720850468
Total Loss of this k_epoch: -0.000656741380225867


Entropy of this k_epoch: 0.024794019758701324
Average policy_loss of this k_epoch: 0.000462900847196579
KL Divergence Average Loss: 0.004820326343178749
Total Loss of this k_epoch: -0.0007285969331860542






Entropy of this k_epoch: 0.025238841772079468
Average policy_loss of this k_epoch: 0.00046811625361442566
KL Divergence Average Loss: 0.004502519965171814
Total Loss of this k_epoch: -0.000748800637666136


Entropy of this k_epoch: 0.02768954634666443
Average policy_loss of this k_epoch: 0.0005303956568241119
KL Divergence Average Loss: 0.005367044359445572
Total Loss of this k_epoch: -0.0008004112751223147






Entropy of this k_epoch: 0.03209080547094345
Average policy_loss of this k_epoch: 0.0006648041307926178
KL Divergence Average Loss: 0.007758375722914934
Total Loss of this k_epoch: -0.0008621523738838732






Entropy of this k_epoch: 0.03308925777673721
Average policy_loss of this k_epoch: 0.0006959736347198486
KL Divergence Average Loss: 0.00824684277176857
Total Loss of this k_epoch: -0.0008760208147577941


Entropy of this k_epoch: 0.03481828048825264
Average policy_loss of this k_epoch: 0.0007455423474311829
KL Divergence Average Loss: 0.009471174329519272
Total Loss of this k_epoch: -0.0009006600012071431


Entropy of this k_epoch: 0.035466358065605164
Average policy_loss of this k_epoch: 0.0007589235901832581
KL Divergence Average Loss: 0.009286785498261452
Total Loss of this k_epoch: -0.0009215264581143856






Entropy of this k_epoch: 0.047149740159511566
Average policy_loss of this k_epoch: 0.0011354237794876099
KL Divergence Average Loss: 0.016813917085528374
Total Loss of this k_epoch: -0.0010539242066442966


Entropy of this k_epoch: 0.04408707097172737
Average policy_loss of this k_epoch: 0.001025654375553131
KL Divergence Average Loss: 0.014658017084002495
Total Loss of this k_epoch: -0.0010321191512048244






Entropy of this k_epoch: 0.050398506224155426
Average policy_loss of this k_epoch: 0.0012417994439601898
KL Divergence Average Loss: 0.019137125462293625
Total Loss of this k_epoch: -0.0010867547243833542






Entropy of this k_epoch: 0.0567731112241745
Average policy_loss of this k_epoch: 0.0014550760388374329
KL Divergence Average Loss: 0.023795107379555702
Total Loss of this k_epoch: -0.0011456285137683153


Entropy of this k_epoch: 0.0617946982383728
Average policy_loss of this k_epoch: 0.0016601644456386566
KL Divergence Average Loss: 0.029383085668087006
Total Loss of this k_epoch: -0.0011357397306710482


Entropy of this k_epoch: 0.06926130503416061
Average policy_loss of this k_epoch: 0.0019284337759017944
KL Divergence Average Loss: 0.035325273871421814
Total Loss of this k_epoch: -0.0011813787277787924






Entropy of this k_epoch: 0.0696185827255249
Average policy_loss of this k_epoch: 0.0019453614950180054
KL Divergence Average Loss: 0.03607029467821121
Total Loss of this k_epoch: -0.001174864824861288


Entropy of this k_epoch: 0.07550510764122009
Average policy_loss of this k_epoch: 0.002163223922252655
KL Divergence Average Loss: 0.04140026122331619
Total Loss of this k_epoch: -0.001198028912767768






Entropy of this k_epoch: 0.08442576229572296
Average policy_loss of this k_epoch: 0.0025745928287506104
KL Divergence Average Loss: 0.05426090583205223
Total Loss of this k_epoch: -0.001104086171835661






Entropy of this k_epoch: 0.08644161373376846
Average policy_loss of this k_epoch: 0.0026621446013450623
KL Divergence Average Loss: 0.05630316585302353
Total Loss of this k_epoch: -0.001096904743462801


Entropy of this k_epoch: 0.0897713303565979
Average policy_loss of this k_epoch: 0.002744566649198532
KL Divergence Average Loss: 0.05734441429376602
Total Loss of this k_epoch: -0.0011705555953085423


Entropy of this k_epoch: 0.09054838865995407
Average policy_loss of this k_epoch: 0.002803131937980652
KL Divergence Average Loss: 0.05983022600412369
Total Loss of this k_epoch: -0.0011259852908551693






Entropy of this k_epoch: 0.09010634571313858
Average policy_loss of this k_epoch: 0.002819344401359558
KL Divergence Average Loss: 0.06088406965136528
Total Loss of this k_epoch: -0.0010771320667117834


Entropy of this k_epoch: 0.08606836199760437
Average policy_loss of this k_epoch: 0.002585574984550476
KL Divergence Average Loss: 0.0524381622672081
Total Loss of this k_epoch: -0.00119346147403121






Entropy of this k_epoch: 0.10092990845441818
Average policy_loss of this k_epoch: 0.003434695303440094
KL Divergence Average Loss: 0.08088237792253494
Total Loss of this k_epoch: -0.0008029765449464321






Entropy of this k_epoch: 0.08667787909507751
Average policy_loss of this k_epoch: 0.0026122108101844788
KL Divergence Average Loss: 0.0538521483540535
Total Loss of this k_epoch: -0.0011831619776785374


Entropy of this k_epoch: 0.0777527317404747
Average policy_loss of this k_epoch: 0.0022173449397087097
KL Divergence Average Loss: 0.042086053639650345
Total Loss of this k_epoch: -0.0012494311667978764


Entropy of this k_epoch: 0.07420916855335236
Average policy_loss of this k_epoch: 0.002100624144077301
KL Divergence Average Loss: 0.039564866572618484
Total Loss of this k_epoch: -0.0012141857296228409






Entropy of this k_epoch: 0.07473506778478622
Average policy_loss of this k_epoch: 0.002110213041305542
KL Divergence Average Loss: 0.039684638381004333
Total Loss of this k_epoch: -0.0012296941131353378


Entropy of this k_epoch: 0.07429542392492294
Average policy_loss of this k_epoch: 0.002080138772726059
KL Divergence Average Loss: 0.038687560707330704
Total Loss of this k_epoch: -0.0012477568816393614







Entropy of this k_epoch: 0.07716228067874908
Average policy_loss of this k_epoch: 0.0024187006056308746
KL Divergence Average Loss: 0.05277138948440552
Total Loss of this k_epoch: -0.0009116996079683304



Epoch 15/32 (Inner K-Epochs):  84%|████████▍ | 54/64 [00:04<00:00, 12.22it/s][A


Entropy of this k_epoch: 0.07062952220439911
Average policy_loss of this k_epoch: 0.0019424110651016235
KL Divergence Average Loss: 0.03511348366737366
Total Loss of this k_epoch: -0.0012379302643239498


Entropy of this k_epoch: 0.0670323297381401
Average policy_loss of this k_epoch: 0.0018235817551612854
KL Divergence Average Loss: 0.032483961433172226
Total Loss of this k_epoch: -0.00120319495908916






Entropy of this k_epoch: 0.06668932735919952
Average policy_loss of this k_epoch: 0.0017918199300765991
KL Divergence Average Loss: 0.031295426189899445
Total Loss of this k_epoch: -0.0012296922504901886


Entropy of this k_epoch: 0.06539797782897949
Average policy_loss of this k_epoch: 0.0017472244799137115
KL Divergence Average Loss: 0.03041214868426323
Total Loss of this k_epoch: -0.0012185529340058565


Entropy of this k_epoch: 0.063541941344738
Average policy_loss of this k_epoch: 0.0016777515411376953
KL Divergence Average Loss: 0.02891452983021736
Total Loss of this k_epoch: -0.001210200134664774






Entropy of this k_epoch: 0.06637723743915558
Average policy_loss of this k_epoch: 0.001780390739440918
KL Divergence Average Loss: 0.03147458657622337
Total Loss of this k_epoch: -0.0012237252667546272


Entropy of this k_epoch: 0.06677854061126709
Average policy_loss of this k_epoch: 0.001799941062927246
KL Divergence Average Loss: 0.031732503324747086
Total Loss of this k_epoch: -0.001221660990267992


Entropy of this k_epoch: 0.0652928352355957
Average policy_loss of this k_epoch: 0.0017407611012458801
KL Divergence Average Loss: 0.03039976954460144
Total Loss of this k_epoch: -0.001219883095473051




Epoch 15/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.40it/s]


Entropy of this k_epoch: 0.0678379088640213
Average policy_loss of this k_epoch: 0.0018527880311012268
KL Divergence Average Loss: 0.03310509771108627
Total Loss of this k_epoch: -0.0012080564629286528


Entropy of this k_epoch: 0.0647372454404831
Average policy_loss of this k_epoch: 0.0017247311770915985
KL Divergence Average Loss: 0.03005831502377987
Total Loss of this k_epoch: -0.0012115479912608862

Last k_epoch stats:
Loss: -0.0012115 | Ratio: 0.9862021 | Entropy Term: 0.0647372


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  47%|████▋     | 15/32 [01:21<01:32,  5.45s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.06862739473581314
Average policy_loss of this k_epoch: 3.600865602493286e-05
KL Divergence Average Loss: 0.0005830682348459959
Total Loss of this k_epoch: -0.003389530349522829






Entropy of this k_epoch: 0.07165160775184631
Average policy_loss of this k_epoch: 0.00014296174049377441
KL Divergence Average Loss: 0.0005083100404590368
Total Loss of this k_epoch: -0.0034345355816185474






Entropy of this k_epoch: 0.08393151313066483
Average policy_loss of this k_epoch: 0.0018402859568595886
KL Divergence Average Loss: 0.1859849989414215
Total Loss of this k_epoch: -0.0004964401014149189


Entropy of this k_epoch: 0.08491472154855728
Average policy_loss of this k_epoch: 0.000698968768119812
KL Divergence Average Loss: 0.03372896462678909
Total Loss of this k_epoch: -0.0032094777561724186


Entropy of this k_epoch: 0.0846007838845253
Average policy_loss of this k_epoch: 0.0006880760192871094
KL Divergence Average Loss: 0.001501098508015275
Total Loss of this k_epoch: -0.0035269521176815033






Entropy of this k_epoch: 0.09145127236843109
Average policy_loss of this k_epoch: 0.0009524896740913391
KL Divergence Average Loss: 0.0020786102395504713
Total Loss of this k_epoch: -0.0035992879420518875


Entropy of this k_epoch: 0.10477939993143082
Average policy_loss of this k_epoch: 0.0015541799366474152
KL Divergence Average Loss: 0.004477859940379858
Total Loss of this k_epoch: -0.0036400118842720985






Entropy of this k_epoch: 0.11022047698497772
Average policy_loss of this k_epoch: 0.0018020272254943848
KL Divergence Average Loss: 0.005846869666129351
Total Loss of this k_epoch: -0.003650528145954013






Entropy of this k_epoch: 0.10748644173145294
Average policy_loss of this k_epoch: 0.0016766935586929321
KL Divergence Average Loss: 0.005491476971656084
Total Loss of this k_epoch: -0.0036427138838917017


Entropy of this k_epoch: 0.12142802774906158
Average policy_loss of this k_epoch: 0.002358894795179367
KL Divergence Average Loss: 0.009204095229506493
Total Loss of this k_epoch: -0.0036204655189067125


Entropy of this k_epoch: 0.13529005646705627
Average policy_loss of this k_epoch: 0.003078453242778778
KL Divergence Average Loss: 0.014251879416406155
Total Loss of this k_epoch: -0.0035435305908322334






Entropy of this k_epoch: 0.13231033086776733
Average policy_loss of this k_epoch: 0.002957955002784729
KL Divergence Average Loss: 0.014019373804330826
Total Loss of this k_epoch: -0.0035173678770661354


Entropy of this k_epoch: 0.1364799439907074
Average policy_loss of this k_epoch: 0.003161914646625519
KL Divergence Average Loss: 0.015505753457546234
Total Loss of this k_epoch: -0.003507025307044387






Entropy of this k_epoch: 0.130839005112648
Average policy_loss of this k_epoch: 0.0029263347387313843
KL Divergence Average Loss: 0.015079647302627563
Total Loss of this k_epoch: -0.0034648189321160316






Entropy of this k_epoch: 0.13918939232826233
Average policy_loss of this k_epoch: 0.0033737048506736755
KL Divergence Average Loss: 0.017907707020640373
Total Loss of this k_epoch: -0.0034066876396536827


Entropy of this k_epoch: 0.12424053996801376
Average policy_loss of this k_epoch: 0.002491828054189682
KL Divergence Average Loss: 0.009960772469639778
Total Loss of this k_epoch: -0.0036205914802849293


Entropy of this k_epoch: 0.12185852229595184
Average policy_loss of this k_epoch: 0.0024020522832870483
KL Divergence Average Loss: 0.009939957410097122
Total Loss of this k_epoch: -0.0035914741456508636






Entropy of this k_epoch: 0.11741481721401215
Average policy_loss of this k_epoch: 0.002204764634370804
KL Divergence Average Loss: 0.00828813947737217
Total Loss of this k_epoch: -0.0035830948036164045


Entropy of this k_epoch: 0.10886023193597794
Average policy_loss of this k_epoch: 0.0017609819769859314
KL Divergence Average Loss: 0.005792627111077309
Total Loss of this k_epoch: -0.0036241034977138042






Entropy of this k_epoch: 0.12112179398536682
Average policy_loss of this k_epoch: 0.0024049170315265656
KL Divergence Average Loss: 0.011329932138323784
Total Loss of this k_epoch: -0.0035378735046833754






Entropy of this k_epoch: 0.10932748019695282
Average policy_loss of this k_epoch: 0.0017867162823677063
KL Divergence Average Loss: 0.006130925379693508
Total Loss of this k_epoch: -0.0036183486226946115


Entropy of this k_epoch: 0.09730701893568039
Average policy_loss of this k_epoch: 0.0012282505631446838
KL Divergence Average Loss: 0.003444963600486517
Total Loss of this k_epoch: -0.003602650947868824


Entropy of this k_epoch: 0.10035653412342072
Average policy_loss of this k_epoch: 0.0013812407851219177
KL Divergence Average Loss: 0.00427637854591012
Total Loss of this k_epoch: -0.003593822242692113






Entropy of this k_epoch: 0.09931693226099014
Average policy_loss of this k_epoch: 0.0012953057885169983
KL Divergence Average Loss: 0.003363970434293151
Total Loss of this k_epoch: -0.003636901266872883


Entropy of this k_epoch: 0.09942018985748291
Average policy_loss of this k_epoch: 0.0013290010392665863
KL Divergence Average Loss: 0.0038350997492671013
Total Loss of this k_epoch: -0.0036036577075719833






Entropy of this k_epoch: 0.09375317394733429
Average policy_loss of this k_epoch: 0.001056857407093048
KL Divergence Average Loss: 0.0024705082178115845
Total Loss of this k_epoch: -0.003606096375733614






Entropy of this k_epoch: 0.09315919876098633
Average policy_loss of this k_epoch: 0.0010432600975036621
KL Divergence Average Loss: 0.0025314856320619583
Total Loss of this k_epoch: -0.003589385189116001


Entropy of this k_epoch: 0.0932237058877945
Average policy_loss of this k_epoch: 0.0010521672666072845
KL Divergence Average Loss: 0.0028010974638164043
Total Loss of this k_epoch: -0.0035810070112347603


Entropy of this k_epoch: 0.09483245760202408
Average policy_loss of this k_epoch: 0.0011297911405563354
KL Divergence Average Loss: 0.0028319871053099632
Total Loss of this k_epoch: -0.0035835120361298323






Entropy of this k_epoch: 0.09970063716173172
Average policy_loss of this k_epoch: 0.0013240724802017212
KL Divergence Average Loss: 0.003389766439795494
Total Loss of this k_epoch: -0.0036270618438720703


Entropy of this k_epoch: 0.096628338098526
Average policy_loss of this k_epoch: 0.0011841580271720886
KL Divergence Average Loss: 0.0031615509651601315
Total Loss of this k_epoch: -0.0036156433634459972






Entropy of this k_epoch: 0.09825798869132996
Average policy_loss of this k_epoch: 0.0012689009308815002
KL Divergence Average Loss: 0.0037301387637853622
Total Loss of this k_epoch: -0.003606697078794241






Entropy of this k_epoch: 0.10193318873643875
Average policy_loss of this k_epoch: 0.0014390908181667328
KL Divergence Average Loss: 0.004061529878526926
Total Loss of this k_epoch: -0.003616953268647194


Entropy of this k_epoch: 0.09919512271881104
Average policy_loss of this k_epoch: 0.0012868233025074005
KL Divergence Average Loss: 0.003198671154677868
Total Loss of this k_epoch: -0.003640946000814438


Entropy of this k_epoch: 0.10384920239448547
Average policy_loss of this k_epoch: 0.0015198513865470886
KL Divergence Average Loss: 0.0047309510409832
Total Loss of this k_epoch: -0.003625299083068967






Entropy of this k_epoch: 0.10096405446529388
Average policy_loss of this k_epoch: 0.001384630799293518
KL Divergence Average Loss: 0.003929006401449442
Total Loss of this k_epoch: -0.003624281845986843


Entropy of this k_epoch: 0.10347598046064377
Average policy_loss of this k_epoch: 0.00151139497756958
KL Divergence Average Loss: 0.004748674109578133
Total Loss of this k_epoch: -0.0036149173974990845






Entropy of this k_epoch: 0.10968515276908875
Average policy_loss of this k_epoch: 0.0017838329076766968
KL Divergence Average Loss: 0.005918607115745544
Total Loss of this k_epoch: -0.0036412389017641544






Entropy of this k_epoch: 0.1090170294046402
Average policy_loss of this k_epoch: 0.0017461217939853668
KL Divergence Average Loss: 0.005738821346312761
Total Loss of this k_epoch: -0.0036473418585956097


Entropy of this k_epoch: 0.11293673515319824
Average policy_loss of this k_epoch: 0.0019288286566734314
KL Divergence Average Loss: 0.006380826234817505
Total Loss of this k_epoch: -0.003654200118035078


Entropy of this k_epoch: 0.11481915414333344
Average policy_loss of this k_epoch: 0.002025030553340912
KL Divergence Average Loss: 0.006953363306820393
Total Loss of this k_epoch: -0.0036463935393840075






Entropy of this k_epoch: 0.10845458507537842
Average policy_loss of this k_epoch: 0.001735694706439972
KL Divergence Average Loss: 0.005416577216237783
Total Loss of this k_epoch: -0.003632869105786085


Entropy of this k_epoch: 0.11859230697154999
Average policy_loss of this k_epoch: 0.002210855484008789
KL Divergence Average Loss: 0.008300164714455605
Total Loss of this k_epoch: -0.003635758301243186






Entropy of this k_epoch: 0.1180422306060791
Average policy_loss of this k_epoch: 0.0021801404654979706
KL Divergence Average Loss: 0.007731161545962095
Total Loss of this k_epoch: -0.0036446594167500734






Entropy of this k_epoch: 0.11364782601594925
Average policy_loss of this k_epoch: 0.0019898489117622375
KL Divergence Average Loss: 0.0075391256250441074
Total Loss of this k_epoch: -0.0036171514075249434


Entropy of this k_epoch: 0.11324120312929153
Average policy_loss of this k_epoch: 0.00193718820810318
KL Divergence Average Loss: 0.006485841237008572
Total Loss of this k_epoch: -0.0036600136663764715


Entropy of this k_epoch: 0.12329703569412231
Average policy_loss of this k_epoch: 0.0024330168962478638
KL Divergence Average Loss: 0.009607858024537563
Total Loss of this k_epoch: -0.0036357566714286804






Entropy of this k_epoch: 0.118197962641716
Average policy_loss of this k_epoch: 0.0021802037954330444
KL Divergence Average Loss: 0.00783260352909565
Total Loss of this k_epoch: -0.0036513684317469597


Entropy of this k_epoch: 0.11199579387903214
Average policy_loss of this k_epoch: 0.0018974058330059052
KL Divergence Average Loss: 0.0064002531580626965
Total Loss of this k_epoch: -0.0036383813712745905






Entropy of this k_epoch: 0.1150384247303009
Average policy_loss of this k_epoch: 0.0020459219813346863
KL Divergence Average Loss: 0.007265896536409855
Total Loss of this k_epoch: -0.0036333403550088406






Entropy of this k_epoch: 0.11780965328216553
Average policy_loss of this k_epoch: 0.0021723806858062744
KL Divergence Average Loss: 0.007809466682374477
Total Loss of this k_epoch: -0.0036400072276592255


Entropy of this k_epoch: 0.11176713556051254
Average policy_loss of this k_epoch: 0.001882016658782959
KL Divergence Average Loss: 0.006290310528129339
Total Loss of this k_epoch: -0.0036434370558708906


Entropy of this k_epoch: 0.11099645495414734
Average policy_loss of this k_epoch: 0.0018372274935245514
KL Divergence Average Loss: 0.006065947003662586
Total Loss of this k_epoch: -0.003651936072856188






Entropy of this k_epoch: 0.1122572049498558
Average policy_loss of this k_epoch: 0.0019185878336429596
KL Divergence Average Loss: 0.006794488523155451
Total Loss of this k_epoch: -0.0036263277288526297


Entropy of this k_epoch: 0.11444833129644394
Average policy_loss of this k_epoch: 0.0020090267062187195
KL Divergence Average Loss: 0.007117577828466892
Total Loss of this k_epoch: -0.003642213996499777






Entropy of this k_epoch: 0.1086573600769043
Average policy_loss of this k_epoch: 0.0017302930355072021
KL Divergence Average Loss: 0.005536138545721769
Total Loss of this k_epoch: -0.0036472133360803127






Entropy of this k_epoch: 0.11128422617912292
Average policy_loss of this k_epoch: 0.001835886389017105
KL Divergence Average Loss: 0.00576196750625968
Total Loss of this k_epoch: -0.003670705482363701


Entropy of this k_epoch: 0.10622254759073257
Average policy_loss of this k_epoch: 0.0016096048057079315
KL Divergence Average Loss: 0.004989021457731724
Total Loss of this k_epoch: -0.0036516322288662195


Entropy of this k_epoch: 0.10975321382284164
Average policy_loss of this k_epoch: 0.0017818808555603027
KL Divergence Average Loss: 0.006058905273675919
Total Loss of this k_epoch: -0.0036451909691095352






Entropy of this k_epoch: 0.1051718071103096
Average policy_loss of this k_epoch: 0.0015788599848747253
KL Divergence Average Loss: 0.004661767743527889
Total Loss of this k_epoch: -0.003633112646639347


Entropy of this k_epoch: 0.10495418310165405
Average policy_loss of this k_epoch: 0.0015495531260967255
KL Divergence Average Loss: 0.004584807902574539
Total Loss of this k_epoch: -0.0036523081362247467






Entropy of this k_epoch: 0.10754545032978058
Average policy_loss of this k_epoch: 0.001686379313468933
KL Divergence Average Loss: 0.005214727483689785
Total Loss of this k_epoch: -0.0036387459840625525




Epoch 16/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.43it/s]


Entropy of this k_epoch: 0.10869678854942322
Average policy_loss of this k_epoch: 0.0017348304390907288
KL Divergence Average Loss: 0.005766704678535461
Total Loss of this k_epoch: -0.0036423420533537865


Entropy of this k_epoch: 0.10806187242269516
Average policy_loss of this k_epoch: 0.001687426120042801
KL Divergence Average Loss: 0.005169089883565903
Total Loss of this k_epoch: -0.003663976676762104

Last k_epoch stats:
Loss: -0.0036640 | Ratio: 0.9865006 | Entropy Term: 0.1080619


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  50%|█████     | 16/32 [01:26<01:26,  5.44s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200,
        0.2200], device='cuda:0')





Entropy of this k_epoch: 0.10285473614931107
Average policy_loss of this k_epoch: 0.00016942620277404785
KL Divergence Average Loss: 0.0004005863447673619
Total Loss of this k_epoch: -0.0049693044275045395






Entropy of this k_epoch: 0.10116443037986755
Average policy_loss of this k_epoch: 0.002295609563589096
KL Divergence Average Loss: 0.000771176302805543
Total Loss of this k_epoch: -0.002754900138825178






Entropy of this k_epoch: 0.08162647485733032
Average policy_loss of this k_epoch: -0.0294004175812006
KL Divergence Average Loss: 0.0015409011393785477
Total Loss of this k_epoch: -0.03346633166074753


Entropy of this k_epoch: 0.06172318756580353
Average policy_loss of this k_epoch: -0.030734864994883537
KL Divergence Average Loss: 0.00429373187944293
Total Loss of this k_epoch: -0.03377808630466461


Entropy of this k_epoch: 0.05761987715959549
Average policy_loss of this k_epoch: -0.030968841165304184
KL Divergence Average Loss: 0.005130224861204624
Total Loss of this k_epoch: -0.03379853069782257






Entropy of this k_epoch: 0.04736170917749405
Average policy_loss of this k_epoch: -0.03157763183116913
KL Divergence Average Loss: 0.007438581436872482
Total Loss of this k_epoch: -0.033871330320835114


Entropy of this k_epoch: 0.0406782403588295
Average policy_loss of this k_epoch: -0.03195711970329285
KL Divergence Average Loss: 0.010317396372556686
Total Loss of this k_epoch: -0.03388785570859909






Entropy of this k_epoch: 0.03208876773715019
Average policy_loss of this k_epoch: -0.03239249810576439
KL Divergence Average Loss: 0.012151053175330162
Total Loss of this k_epoch: -0.03387542441487312






Entropy of this k_epoch: 0.028705870732665062
Average policy_loss of this k_epoch: -0.032555609941482544
KL Divergence Average Loss: 0.013331606052815914
Total Loss of this k_epoch: -0.03385758772492409


Entropy of this k_epoch: 0.02367663010954857
Average policy_loss of this k_epoch: -0.032791636884212494
KL Divergence Average Loss: 0.01525348424911499
Total Loss of this k_epoch: -0.033822931349277496


Entropy of this k_epoch: 0.022248640656471252
Average policy_loss of this k_epoch: -0.03285758197307587
KL Divergence Average Loss: 0.015852974727749825
Total Loss of this k_epoch: -0.03381148353219032






Entropy of this k_epoch: 0.0214519239962101
Average policy_loss of this k_epoch: -0.032913316041231155
KL Divergence Average Loss: 0.016141410917043686
Total Loss of this k_epoch: -0.03382449969649315


Entropy of this k_epoch: 0.01822744496166706
Average policy_loss of this k_epoch: -0.033050425350666046
KL Divergence Average Loss: 0.017699752002954483
Total Loss of this k_epoch: -0.03378480300307274






Entropy of this k_epoch: 0.017362603917717934
Average policy_loss of this k_epoch: -0.03308616578578949
KL Divergence Average Loss: 0.017850279808044434
Total Loss of this k_epoch: -0.03377579525113106






Entropy of this k_epoch: 0.016026737168431282
Average policy_loss of this k_epoch: -0.033129431307315826
KL Divergence Average Loss: 0.018508784472942352
Total Loss of this k_epoch: -0.03374568000435829


Entropy of this k_epoch: 0.01680225506424904
Average policy_loss of this k_epoch: -0.0331089086830616
KL Divergence Average Loss: 0.01819469779729843
Total Loss of this k_epoch: -0.03376707434654236


Entropy of this k_epoch: 0.015078331343829632
Average policy_loss of this k_epoch: -0.03318444639444351
KL Divergence Average Loss: 0.018916983157396317
Total Loss of this k_epoch: -0.03374919295310974






Entropy of this k_epoch: 0.013554893434047699
Average policy_loss of this k_epoch: -0.033243902027606964
KL Divergence Average Loss: 0.01959657482802868
Total Loss of this k_epoch: -0.03372568264603615


Entropy of this k_epoch: 0.014018077403306961
Average policy_loss of this k_epoch: -0.033232685178518295
KL Divergence Average Loss: 0.01942341774702072
Total Loss of this k_epoch: -0.0337393544614315






Entropy of this k_epoch: 0.013686473481357098
Average policy_loss of this k_epoch: -0.033234864473342896
KL Divergence Average Loss: 0.019628219306468964
Total Loss of this k_epoch: -0.033722907304763794






Entropy of this k_epoch: 0.0143954548984766
Average policy_loss of this k_epoch: -0.03320270776748657
KL Divergence Average Loss: 0.01948734186589718
Total Loss of this k_epoch: -0.03372760862112045


Entropy of this k_epoch: 0.014952486380934715
Average policy_loss of this k_epoch: -0.03319324553012848
KL Divergence Average Loss: 0.020143497735261917
Total Loss of this k_epoch: -0.03373943641781807






Entropy of this k_epoch: 0.0139092355966568
Average policy_loss of this k_epoch: -0.033222049474716187
KL Divergence Average Loss: 0.02033042535185814
Total Loss of this k_epoch: -0.033714208751916885


Entropy of this k_epoch: 0.014670345932245255
Average policy_loss of this k_epoch: -0.03319598361849785
KL Divergence Average Loss: 0.019078481942415237
Total Loss of this k_epoch: -0.033738717436790466


Entropy of this k_epoch: 0.01456380169838667
Average policy_loss of this k_epoch: -0.033215198665857315
KL Divergence Average Loss: 0.019913405179977417
Total Loss of this k_epoch: -0.03374425321817398






Entropy of this k_epoch: 0.014373604208230972
Average policy_loss of this k_epoch: -0.03321065381169319
KL Divergence Average Loss: 0.019371217116713524
Total Loss of this k_epoch: -0.033735621720552444


Entropy of this k_epoch: 0.014672385528683662
Average policy_loss of this k_epoch: -0.03319130837917328
KL Divergence Average Loss: 0.019522948190569878
Total Loss of this k_epoch: -0.03372969478368759


Entropy of this k_epoch: 0.01449653971940279
Average policy_loss of this k_epoch: -0.033201247453689575
KL Divergence Average Loss: 0.019177980720996857
Total Loss of this k_epoch: -0.03373429551720619






Entropy of this k_epoch: 0.015473483130335808
Average policy_loss of this k_epoch: -0.03315933793783188
KL Divergence Average Loss: 0.019081391394138336
Total Loss of this k_epoch: -0.03374220058321953


Entropy of this k_epoch: 0.014440517872571945
Average policy_loss of this k_epoch: -0.03320863097906113
KL Divergence Average Loss: 0.01929418556392193
Total Loss of this k_epoch: -0.03373771533370018


Entropy of this k_epoch: 0.014833346009254456
Average policy_loss of this k_epoch: -0.033188119530677795
KL Divergence Average Loss: 0.01928798109292984
Total Loss of this k_epoch: -0.033736906945705414






Entropy of this k_epoch: 0.016463229432702065
Average policy_loss of this k_epoch: -0.033120885491371155
KL Divergence Average Loss: 0.01853111758828163
Total Loss of this k_epoch: -0.0337587371468544


Entropy of this k_epoch: 0.01581234112381935
Average policy_loss of this k_epoch: -0.0331711620092392
KL Divergence Average Loss: 0.018930237740278244
Total Loss of this k_epoch: -0.03377247974276543


Entropy of this k_epoch: 0.01841738075017929
Average policy_loss of this k_epoch: -0.03304271027445793
KL Divergence Average Loss: 0.017515132203698158
Total Loss of this k_epoch: -0.03378842771053314






Entropy of this k_epoch: 0.016738908365368843
Average policy_loss of this k_epoch: -0.0331251323223114
KL Divergence Average Loss: 0.018291598185896873
Total Loss of this k_epoch: -0.03377916291356087


Entropy of this k_epoch: 0.015889564529061317
Average policy_loss of this k_epoch: -0.033157072961330414
KL Divergence Average Loss: 0.01861395686864853
Total Loss of this k_epoch: -0.033765412867069244


Entropy of this k_epoch: 0.017906732857227325
Average policy_loss of this k_epoch: -0.033075496554374695
KL Divergence Average Loss: 0.01768455281853676
Total Loss of this k_epoch: -0.03379398584365845






Entropy of this k_epoch: 0.018697991967201233
Average policy_loss of this k_epoch: -0.03301975131034851
KL Divergence Average Loss: 0.01725567877292633
Total Loss of this k_epoch: -0.03378209471702576


Entropy of this k_epoch: 0.018632858991622925
Average policy_loss of this k_epoch: -0.03301709145307541
KL Divergence Average Loss: 0.01737888902425766
Total Loss of this k_epoch: -0.03377494588494301


Entropy of this k_epoch: 0.01919850893318653
Average policy_loss of this k_epoch: -0.03300867974758148
KL Divergence Average Loss: 0.018446285277605057
Total Loss of this k_epoch: -0.03378414362668991






Entropy of this k_epoch: 0.020449429750442505
Average policy_loss of this k_epoch: -0.0329371839761734
KL Divergence Average Loss: 0.016724105924367905
Total Loss of this k_epoch: -0.033792417496442795


Entropy of this k_epoch: 0.024245887994766235
Average policy_loss of this k_epoch: -0.03268512338399887
KL Divergence Average Loss: 0.0359131395816803
Total Loss of this k_epoch: -0.03353828564286232


Entropy of this k_epoch: 0.022835083305835724
Average policy_loss of this k_epoch: -0.03283759951591492
KL Divergence Average Loss: 0.01558571495115757
Total Loss of this k_epoch: -0.033823493868112564






Entropy of this k_epoch: 0.02132513001561165
Average policy_loss of this k_epoch: -0.032890766859054565
KL Divergence Average Loss: 0.016173729673027992
Total Loss of this k_epoch: -0.03379528596997261


Entropy of this k_epoch: 0.021090928465127945
Average policy_loss of this k_epoch: -0.03291958570480347
KL Divergence Average Loss: 0.016325771808624268
Total Loss of this k_epoch: -0.033810876309871674


Entropy of this k_epoch: 0.0246933251619339
Average policy_loss of this k_epoch: -0.03273692727088928
KL Divergence Average Loss: 0.014849514700472355
Total Loss of this k_epoch: -0.03382309898734093






Entropy of this k_epoch: 0.0263071246445179
Average policy_loss of this k_epoch: -0.03265710920095444
KL Divergence Average Loss: 0.014190323650836945
Total Loss of this k_epoch: -0.033830560743808746


Entropy of this k_epoch: 0.02511654794216156
Average policy_loss of this k_epoch: -0.03273029625415802
KL Divergence Average Loss: 0.014635952189564705
Total Loss of this k_epoch: -0.033839765936136246


Entropy of this k_epoch: 0.026553094387054443
Average policy_loss of this k_epoch: -0.03266307711601257
KL Divergence Average Loss: 0.014114540070295334
Total Loss of this k_epoch: -0.03384958952665329






Entropy of this k_epoch: 0.028453275561332703
Average policy_loss of this k_epoch: -0.03258703649044037
KL Divergence Average Loss: 0.013413922861218452
Total Loss of this k_epoch: -0.03387555852532387


Entropy of this k_epoch: 0.028120553120970726
Average policy_loss of this k_epoch: -0.032580308616161346
KL Divergence Average Loss: 0.013639641925692558
Total Loss of this k_epoch: -0.033849939703941345


Entropy of this k_epoch: 0.0294962041079998
Average policy_loss of this k_epoch: -0.03251880407333374
KL Divergence Average Loss: 0.01303455512970686
Total Loss of this k_epoch: -0.03386326879262924






Entropy of this k_epoch: 0.028007030487060547
Average policy_loss of this k_epoch: -0.03256944566965103
KL Divergence Average Loss: 0.013583209365606308
Total Loss of this k_epoch: -0.03383396565914154


Entropy of this k_epoch: 0.030909359455108643
Average policy_loss of this k_epoch: -0.03243863955140114
KL Divergence Average Loss: 0.012594051659107208
Total Loss of this k_epoch: -0.03385816514492035


Entropy of this k_epoch: 0.033608388155698776
Average policy_loss of this k_epoch: -0.03235303983092308
KL Divergence Average Loss: 0.01163504272699356
Total Loss of this k_epoch: -0.033917106688022614






Entropy of this k_epoch: 0.03180994093418121
Average policy_loss of this k_epoch: -0.03240414708852768
KL Divergence Average Loss: 0.01222282089293003
Total Loss of this k_epoch: -0.033872418105602264


Entropy of this k_epoch: 0.03284095972776413
Average policy_loss of this k_epoch: -0.03234908729791641
KL Divergence Average Loss: 0.01181713491678238
Total Loss of this k_epoch: -0.03387296572327614


Entropy of this k_epoch: 0.034691110253334045
Average policy_loss of this k_epoch: -0.03226497024297714
KL Divergence Average Loss: 0.011179590597748756
Total Loss of this k_epoch: -0.03388772904872894






Entropy of this k_epoch: 0.0355241522192955
Average policy_loss of this k_epoch: -0.03221694007515907
KL Divergence Average Loss: 0.010906869545578957
Total Loss of this k_epoch: -0.03388407826423645


Entropy of this k_epoch: 0.03481690213084221
Average policy_loss of this k_epoch: -0.03226248547434807
KL Divergence Average Loss: 0.011221460998058319
Total Loss of this k_epoch: -0.03389111906290054


Entropy of this k_epoch: 0.040295735001564026
Average policy_loss of this k_epoch: -0.03197219595313072
KL Divergence Average Loss: 0.009476089850068092
Total Loss of this k_epoch: -0.03389222174882889




Epoch 17/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.49it/s]

Entropy of this k_epoch: 0.03807155787944794
Average policy_loss of this k_epoch: -0.03205921873450279
KL Divergence Average Loss: 0.010230105370283127
Total Loss of this k_epoch: -0.033860497176647186


Entropy of this k_epoch: 0.03777726739645004
Average policy_loss of this k_epoch: -0.032062746584415436
KL Divergence Average Loss: 0.01025553047657013
Total Loss of this k_epoch: -0.03384905681014061


Entropy of this k_epoch: 0.039471715688705444
Average policy_loss of this k_epoch: -0.03203132376074791
KL Divergence Average Loss: 0.009753981605172157
Total Loss of this k_epoch: -0.033907368779182434

Last k_epoch stats:
Loss: -0.0339074 | Ratio: 0.9978300 | Entropy Term: 0.0394717



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  53%|█████▎    | 17/32 [01:32<01:21,  5.42s/it]

Entire Validation Dataset Accuracy: 0.9219| 177.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.038947537541389465
Average policy_loss of this k_epoch: 8.786842226982117e-05
KL Divergence Average Loss: 0.00024562867474742234
Total Loss of this k_epoch: -0.0018570522079244256






Entropy of this k_epoch: 0.03969286382198334
Average policy_loss of this k_epoch: 0.00011293217539787292
KL Divergence Average Loss: 0.00029884930700063705
Total Loss of this k_epoch: -0.0018687226111069322






Entropy of this k_epoch: 0.04218991473317146
Average policy_loss of this k_epoch: 0.00018990039825439453
KL Divergence Average Loss: 0.00029929482843726873
Total Loss of this k_epoch: -0.0019166023703292012


Entropy of this k_epoch: 0.04294367879629135
Average policy_loss of this k_epoch: 0.00021734461188316345
KL Divergence Average Loss: 0.00043076329166069627
Total Loss of this k_epoch: -0.0019255317747592926


Entropy of this k_epoch: 0.045605167746543884
Average policy_loss of this k_epoch: 0.0003018788993358612
KL Divergence Average Loss: 0.0006227456033229828
Total Loss of this k_epoch: -0.0019721519201993942






Entropy of this k_epoch: 0.050349026918411255
Average policy_loss of this k_epoch: 0.0004608631134033203
KL Divergence Average Loss: 0.0013278692495077848
Total Loss of this k_epoch: -0.00204330962151289


Entropy of this k_epoch: 0.049537964165210724
Average policy_loss of this k_epoch: 0.00042868778109550476
KL Divergence Average Loss: 0.000980186858214438
Total Loss of this k_epoch: -0.002038408536463976






Entropy of this k_epoch: 0.055800240486860275
Average policy_loss of this k_epoch: 0.000642530620098114
KL Divergence Average Loss: 0.002000665059313178
Total Loss of this k_epoch: -0.0021274748723953962






Entropy of this k_epoch: 0.05391936004161835
Average policy_loss of this k_epoch: 0.0005812719464302063
KL Divergence Average Loss: 0.0016185434069484472
Total Loss of this k_epoch: -0.002098510740324855


Entropy of this k_epoch: 0.06564962863922119
Average policy_loss of this k_epoch: 0.0009970217943191528
KL Divergence Average Loss: 0.003909137099981308
Total Loss of this k_epoch: -0.002246368443593383


Entropy of this k_epoch: 0.06173938885331154
Average policy_loss of this k_epoch: 0.000856451690196991
KL Divergence Average Loss: 0.003028855426236987
Total Loss of this k_epoch: -0.00220022932626307






Entropy of this k_epoch: 0.07062144577503204
Average policy_loss of this k_epoch: 0.0012048780918121338
KL Divergence Average Loss: 0.005531902424991131
Total Loss of this k_epoch: -0.002270875032991171


Entropy of this k_epoch: 0.07385015487670898
Average policy_loss of this k_epoch: 0.0012996681034564972
KL Divergence Average Loss: 0.0057597169652581215
Total Loss of this k_epoch: -0.002335242461413145






Entropy of this k_epoch: 0.08541571348905563
Average policy_loss of this k_epoch: 0.0017652958631515503
KL Divergence Average Loss: 0.009718338958919048
Total Loss of this k_epoch: -0.0024083065800368786






Entropy of this k_epoch: 0.0911245346069336
Average policy_loss of this k_epoch: 0.00200532004237175
KL Divergence Average Loss: 0.012105134315788746
Total Loss of this k_epoch: -0.0024298555217683315


Entropy of this k_epoch: 0.09604975581169128
Average policy_loss of this k_epoch: 0.0022149235010147095
KL Divergence Average Loss: 0.013946563005447388
Total Loss of this k_epoch: -0.002448098734021187


Entropy of this k_epoch: 0.10233613103628159
Average policy_loss of this k_epoch: 0.00248757004737854
KL Divergence Average Loss: 0.016868360340595245
Total Loss of this k_epoch: -0.002460553077980876






Entropy of this k_epoch: 0.10502557456493378
Average policy_loss of this k_epoch: 0.002613171935081482
KL Divergence Average Loss: 0.0184618029743433
Total Loss of this k_epoch: -0.002453488763421774


Entropy of this k_epoch: 0.1092335432767868
Average policy_loss of this k_epoch: 0.0028045475482940674
KL Divergence Average Loss: 0.02047787234187126
Total Loss of this k_epoch: -0.002452351152896881






Entropy of this k_epoch: 0.11534152925014496
Average policy_loss of this k_epoch: 0.003131397068500519
KL Divergence Average Loss: 0.025287771597504616
Total Loss of this k_epoch: -0.0023828018456697464






Entropy of this k_epoch: 0.12361395359039307
Average policy_loss of this k_epoch: 0.0035019516944885254
KL Divergence Average Loss: 0.029304152354598045
Total Loss of this k_epoch: -0.0023857043124735355


Entropy of this k_epoch: 0.12206901609897614
Average policy_loss of this k_epoch: 0.0034366771578788757
KL Divergence Average Loss: 0.029122797772288322
Total Loss of this k_epoch: -0.0023755456786602736


Entropy of this k_epoch: 0.12873180210590363
Average policy_loss of this k_epoch: 0.0037994980812072754
KL Divergence Average Loss: 0.034067172557115555
Total Loss of this k_epoch: -0.002296420279890299






Entropy of this k_epoch: 0.1230737715959549
Average policy_loss of this k_epoch: 0.0034698806703090668
KL Divergence Average Loss: 0.02886907383799553
Total Loss of this k_epoch: -0.002395117422565818


Entropy of this k_epoch: 0.12324289232492447
Average policy_loss of this k_epoch: 0.0034727007150650024
KL Divergence Average Loss: 0.029033301398158073
Total Loss of this k_epoch: -0.002399110933765769






Entropy of this k_epoch: 0.11452380567789078
Average policy_loss of this k_epoch: 0.0030738413333892822
KL Divergence Average Loss: 0.024310968816280365
Total Loss of this k_epoch: -0.002409239299595356






Entropy of this k_epoch: 0.1165279895067215
Average policy_loss of this k_epoch: 0.0031444095075130463
KL Divergence Average Loss: 0.024656733497977257
Total Loss of this k_epoch: -0.002435422735288739


Entropy of this k_epoch: 0.12018587440252304
Average policy_loss of this k_epoch: 0.003318239003419876
KL Divergence Average Loss: 0.026716865599155426
Total Loss of this k_epoch: -0.00242388597689569


Entropy of this k_epoch: 0.1130504459142685
Average policy_loss of this k_epoch: 0.002981431782245636
KL Divergence Average Loss: 0.022581636905670166
Total Loss of this k_epoch: -0.002445274032652378






Entropy of this k_epoch: 0.10952195525169373
Average policy_loss of this k_epoch: 0.0028182119131088257
KL Divergence Average Loss: 0.020748354494571686
Total Loss of this k_epoch: -0.002450402593240142


Entropy of this k_epoch: 0.1078227236866951
Average policy_loss of this k_epoch: 0.0027490034699440002
KL Divergence Average Loss: 0.02015935443341732
Total Loss of this k_epoch: -0.002440539188683033






Entropy of this k_epoch: 0.10652447491884232
Average policy_loss of this k_epoch: 0.002677448093891144
KL Divergence Average Loss: 0.018997954204678535
Total Loss of this k_epoch: -0.0024587963707745075






Entropy of this k_epoch: 0.10057505965232849
Average policy_loss of this k_epoch: 0.0024211108684539795
KL Divergence Average Loss: 0.016342421993613243
Total Loss of this k_epoch: -0.002444217912852764


Entropy of this k_epoch: 0.0998430922627449
Average policy_loss of this k_epoch: 0.002389177680015564
KL Divergence Average Loss: 0.016049280762672424
Total Loss of this k_epoch: -0.002442484488710761


Entropy of this k_epoch: 0.09513060748577118
Average policy_loss of this k_epoch: 0.0021791495382785797
KL Divergence Average Loss: 0.013655677437782288
Total Loss of this k_epoch: -0.0024408239405602217






Entropy of this k_epoch: 0.09859482944011688
Average policy_loss of this k_epoch: 0.0023298263549804688
KL Divergence Average Loss: 0.01510116457939148
Total Loss of this k_epoch: -0.0024489033967256546


Entropy of this k_epoch: 0.09430329501628876
Average policy_loss of this k_epoch: 0.0021393969655036926
KL Divergence Average Loss: 0.013240436092019081
Total Loss of this k_epoch: -0.0024433634243905544






Entropy of this k_epoch: 0.09964555501937866
Average policy_loss of this k_epoch: 0.0023697875440120697
KL Divergence Average Loss: 0.015777191147208214
Total Loss of this k_epoch: -0.00245471834205091






Entropy of this k_epoch: 0.09534056484699249
Average policy_loss of this k_epoch: 0.0021779723465442657
KL Divergence Average Loss: 0.01362509373575449
Total Loss of this k_epoch: -0.0024528049398213625


Entropy of this k_epoch: 0.0960797667503357
Average policy_loss of this k_epoch: 0.0022278502583503723
KL Divergence Average Loss: 0.014490737579762936
Total Loss of this k_epoch: -0.0024312310852110386


Entropy of this k_epoch: 0.09858319163322449
Average policy_loss of this k_epoch: 0.0023338571190834045
KL Divergence Average Loss: 0.015462432987987995
Total Loss of this k_epoch: -0.002440678421407938






Entropy of this k_epoch: 0.09890174865722656
Average policy_loss of this k_epoch: 0.0023308023810386658
KL Divergence Average Loss: 0.015175700187683105
Total Loss of this k_epoch: -0.0024625284131616354


Entropy of this k_epoch: 0.09729562699794769
Average policy_loss of this k_epoch: 0.0022658631205558777
KL Divergence Average Loss: 0.014666812494397163
Total Loss of this k_epoch: -0.002452250337228179






Entropy of this k_epoch: 0.0967998057603836
Average policy_loss of this k_epoch: 0.0022566281259059906
KL Divergence Average Loss: 0.01493409276008606
Total Loss of this k_epoch: -0.0024340213276445866






Entropy of this k_epoch: 0.09954586625099182
Average policy_loss of this k_epoch: 0.002369321882724762
KL Divergence Average Loss: 0.015821587294340134
Total Loss of this k_epoch: -0.0024497555568814278


Entropy of this k_epoch: 0.09685856103897095
Average policy_loss of this k_epoch: 0.0022462382912635803
KL Divergence Average Loss: 0.01445554569363594
Total Loss of this k_epoch: -0.0024521343875676394


Entropy of this k_epoch: 0.09780177474021912
Average policy_loss of this k_epoch: 0.0023009702563285828
KL Divergence Average Loss: 0.015021643601357937
Total Loss of this k_epoch: -0.0024389021564275026






Entropy of this k_epoch: 0.09988240152597427
Average policy_loss of this k_epoch: 0.0023777931928634644
KL Divergence Average Loss: 0.015713118016719818
Total Loss of this k_epoch: -0.002459195675328374


Entropy of this k_epoch: 0.09964657574892044
Average policy_loss of this k_epoch: 0.002370428293943405
KL Divergence Average Loss: 0.015842437744140625
Total Loss of this k_epoch: -0.0024534761905670166






Entropy of this k_epoch: 0.09948542714118958
Average policy_loss of this k_epoch: 0.002357080578804016
KL Divergence Average Loss: 0.015423707664012909
Total Loss of this k_epoch: -0.0024629540275782347






Entropy of this k_epoch: 0.10110186040401459
Average policy_loss of this k_epoch: 0.0024457648396492004
KL Divergence Average Loss: 0.01649092137813568
Total Loss of this k_epoch: -0.002444419078528881


Entropy of this k_epoch: 0.10383083671331406
Average policy_loss of this k_epoch: 0.0025569982826709747
KL Divergence Average Loss: 0.017645427957177162
Total Loss of this k_epoch: -0.0024580892641097307


Entropy of this k_epoch: 0.10199560225009918
Average policy_loss of this k_epoch: 0.0024859122931957245
KL Divergence Average Loss: 0.017162587493658066
Total Loss of this k_epoch: -0.0024422421120107174






Entropy of this k_epoch: 0.1043570265173912
Average policy_loss of this k_epoch: 0.002582937479019165
KL Divergence Average Loss: 0.018032897263765335
Total Loss of this k_epoch: -0.00245458516292274


Entropy of this k_epoch: 0.10462737083435059
Average policy_loss of this k_epoch: 0.002581968903541565
KL Divergence Average Loss: 0.018075132742524147
Total Loss of this k_epoch: -0.002468648599460721






Entropy of this k_epoch: 0.10725092142820358
Average policy_loss of this k_epoch: 0.002698764204978943
KL Divergence Average Loss: 0.01927764341235161
Total Loss of this k_epoch: -0.002471005544066429






Entropy of this k_epoch: 0.1069440245628357
Average policy_loss of this k_epoch: 0.0027034729719161987
KL Divergence Average Loss: 0.019572697579860687
Total Loss of this k_epoch: -0.002448001177981496


Entropy of this k_epoch: 0.10704723000526428
Average policy_loss of this k_epoch: 0.002688586711883545
KL Divergence Average Loss: 0.018966838717460632
Total Loss of this k_epoch: -0.0024741063825786114


Entropy of this k_epoch: 0.10674667358398438
Average policy_loss of this k_epoch: 0.002680528908967972
KL Divergence Average Loss: 0.019137030467391014
Total Loss of this k_epoch: -0.002465434605255723






Entropy of this k_epoch: 0.10970713198184967
Average policy_loss of this k_epoch: 0.0028251558542251587
KL Divergence Average Loss: 0.021056335419416428
Total Loss of this k_epoch: -0.0024496375117450953


Entropy of this k_epoch: 0.10461024940013885
Average policy_loss of this k_epoch: 0.0025958046317100525
KL Divergence Average Loss: 0.018312327563762665
Total Loss of this k_epoch: -0.0024515846744179726






Entropy of this k_epoch: 0.10390116274356842
Average policy_loss of this k_epoch: 0.002564944326877594
KL Divergence Average Loss: 0.017837829887866974
Total Loss of this k_epoch: -0.002451735781505704




Epoch 18/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.43it/s]


Entropy of this k_epoch: 0.1083218902349472
Average policy_loss of this k_epoch: 0.0027629248797893524
KL Divergence Average Loss: 0.020014148205518723
Total Loss of this k_epoch: -0.00245302845723927


Entropy of this k_epoch: 0.10587193071842194
Average policy_loss of this k_epoch: 0.002644851803779602
KL Divergence Average Loss: 0.018642466515302658
Total Loss of this k_epoch: -0.002462320029735565

Last k_epoch stats:
Loss: -0.0024623 | Ratio: 0.9788412 | Entropy Term: 0.1058719


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  56%|█████▋    | 18/32 [01:37<01:15,  5.42s/it]

Entire Validation Dataset Accuracy: 0.9375| 180.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.1032225638628006
Average policy_loss of this k_epoch: 9.345263242721558e-05
KL Divergence Average Loss: 0.0005611926899291575
Total Loss of this k_epoch: -0.005062063690274954






Entropy of this k_epoch: 0.10845530033111572
Average policy_loss of this k_epoch: 0.00034842267632484436
KL Divergence Average Loss: 0.0006241941591724753
Total Loss of this k_epoch: -0.005068100523203611






Entropy of this k_epoch: 0.10496409237384796
Average policy_loss of this k_epoch: 0.0001713782548904419
KL Divergence Average Loss: 0.000485327560454607
Total Loss of this k_epoch: -0.00507197342813015


Entropy of this k_epoch: 0.10864215344190598
Average policy_loss of this k_epoch: 0.0003501996397972107
KL Divergence Average Loss: 0.0006795238004997373
Total Loss of this k_epoch: -0.005075112916529179


Entropy of this k_epoch: 0.1029026135802269
Average policy_loss of this k_epoch: 8.60169529914856e-05
KL Divergence Average Loss: 0.0008419597870670259
Total Loss of this k_epoch: -0.005050694104284048






Entropy of this k_epoch: 0.10569620132446289
Average policy_loss of this k_epoch: 0.00020466744899749756
KL Divergence Average Loss: 0.0005759038613177836
Total Loss of this k_epoch: -0.005074383690953255


Entropy of this k_epoch: 0.10397075116634369
Average policy_loss of this k_epoch: 0.0001343153417110443
KL Divergence Average Loss: 0.0007715846295468509
Total Loss of this k_epoch: -0.005056506488472223






Entropy of this k_epoch: 0.11058995127677917
Average policy_loss of this k_epoch: 0.0004430040717124939
KL Divergence Average Loss: 0.0006775468937121332
Total Loss of this k_epoch: -0.005079718306660652






Entropy of this k_epoch: 0.11442404985427856
Average policy_loss of this k_epoch: 0.0006233341991901398
KL Divergence Average Loss: 0.0010352223180234432
Total Loss of this k_epoch: -0.0050875162705779076


Entropy of this k_epoch: 0.11477949470281601
Average policy_loss of this k_epoch: 0.0006363466382026672
KL Divergence Average Loss: 0.0008303411304950714
Total Loss of this k_epoch: -0.005094325169920921


Entropy of this k_epoch: 0.11448217183351517
Average policy_loss of this k_epoch: 0.0006243959069252014
KL Divergence Average Loss: 0.0007550588343292475
Total Loss of this k_epoch: -0.005092162173241377






Entropy of this k_epoch: 0.10872349143028259
Average policy_loss of this k_epoch: 0.0003473833203315735
KL Divergence Average Loss: 0.0006612609140574932
Total Loss of this k_epoch: -0.005082178860902786


Entropy of this k_epoch: 0.11539959907531738
Average policy_loss of this k_epoch: 0.0006635263562202454
KL Divergence Average Loss: 0.0008689598762430251
Total Loss of this k_epoch: -0.005097764078527689






Entropy of this k_epoch: 0.11225053668022156
Average policy_loss of this k_epoch: 0.0005303248763084412
KL Divergence Average Loss: 0.0010229209437966347
Total Loss of this k_epoch: -0.0050719729624688625






Entropy of this k_epoch: 0.12802788615226746
Average policy_loss of this k_epoch: 0.001350007951259613
KL Divergence Average Loss: 0.06607504189014435
Total Loss of this k_epoch: -0.004390635993331671


Entropy of this k_epoch: 0.11673525720834732
Average policy_loss of this k_epoch: 0.0007699131965637207
KL Divergence Average Loss: 0.0013553000753745437
Total Loss of this k_epoch: -0.005053296685218811


Entropy of this k_epoch: 0.1196022629737854
Average policy_loss of this k_epoch: 0.0008956938982009888
KL Divergence Average Loss: 0.001610822742804885
Total Loss of this k_epoch: -0.0050683110021054745






Entropy of this k_epoch: 0.12564238905906677
Average policy_loss of this k_epoch: 0.0011783391237258911
KL Divergence Average Loss: 0.0016626366414129734
Total Loss of this k_epoch: -0.005087153986096382


Entropy of this k_epoch: 0.12349295616149902
Average policy_loss of this k_epoch: 0.0011098384857177734
KL Divergence Average Loss: 0.002139395335689187
Total Loss of this k_epoch: -0.005043415352702141






Entropy of this k_epoch: 0.11791641265153885
Average policy_loss of this k_epoch: 0.0008117556571960449
KL Divergence Average Loss: 0.0015212579164654016
Total Loss of this k_epoch: -0.005068852566182613






Entropy of this k_epoch: 0.12423057109117508
Average policy_loss of this k_epoch: 0.0011272430419921875
KL Divergence Average Loss: 0.0017288275994360447
Total Loss of this k_epoch: -0.005066997371613979


Entropy of this k_epoch: 0.12373853474855423
Average policy_loss of this k_epoch: 0.0011117905378341675
KL Divergence Average Loss: 0.00194828724488616
Total Loss of this k_epoch: -0.0050556533969938755


Entropy of this k_epoch: 0.1363900601863861
Average policy_loss of this k_epoch: 0.0026129335165023804
KL Divergence Average Loss: 0.021970562636852264
Total Loss of this k_epoch: -0.003986863885074854






Entropy of this k_epoch: 0.12110893428325653
Average policy_loss of this k_epoch: 0.001013137400150299
KL Divergence Average Loss: 0.0025832857936620712
Total Loss of this k_epoch: -0.005016476381570101


Entropy of this k_epoch: 0.11930695921182632
Average policy_loss of this k_epoch: 0.0008895844221115112
KL Divergence Average Loss: 0.001742226304486394
Total Loss of this k_epoch: -0.005058341193944216






Entropy of this k_epoch: 0.10682214796543121
Average policy_loss of this k_epoch: 0.00027632713317871094
KL Divergence Average Loss: 0.000768390716984868
Total Loss of this k_epoch: -0.005057096481323242






Entropy of this k_epoch: 0.1119423508644104
Average policy_loss of this k_epoch: 0.0005078837275505066
KL Divergence Average Loss: 0.0006292684120126069
Total Loss of this k_epoch: -0.005082941614091396


Entropy of this k_epoch: 0.1023239940404892
Average policy_loss of this k_epoch: 6.581097841262817e-05
KL Divergence Average Loss: 0.0008104771841317415
Total Loss of this k_epoch: -0.005042283795773983


Entropy of this k_epoch: 0.11130596697330475
Average policy_loss of this k_epoch: 0.0004661157727241516
KL Divergence Average Loss: 0.0005749252159148455
Total Loss of this k_epoch: -0.005093433428555727






Entropy of this k_epoch: 0.10404174029827118
Average policy_loss of this k_epoch: 0.00012936443090438843
KL Divergence Average Loss: 0.00045575728290714324
Total Loss of this k_epoch: -0.005068165250122547


Entropy of this k_epoch: 0.10639656335115433
Average policy_loss of this k_epoch: 0.00023448839783668518
KL Divergence Average Loss: 0.0005725090741179883
Total Loss of this k_epoch: -0.0050796144641935825






Entropy of this k_epoch: 0.10049137473106384
Average policy_loss of this k_epoch: -3.11434268951416e-05
KL Divergence Average Loss: 0.0006133392453193665
Total Loss of this k_epoch: -0.005049578845500946






Entropy of this k_epoch: 0.10447008907794952
Average policy_loss of this k_epoch: 0.00015037134289741516
KL Divergence Average Loss: 0.0007158065563999116
Total Loss of this k_epoch: -0.005065975245088339


Entropy of this k_epoch: 0.10050570964813232
Average policy_loss of this k_epoch: -3.910064697265625e-05
KL Divergence Average Loss: 0.00044653983786702156
Total Loss of this k_epoch: -0.005059921182692051


Entropy of this k_epoch: 0.10817437618970871
Average policy_loss of this k_epoch: 0.00031250715255737305
KL Divergence Average Loss: 0.00035132281482219696
Total Loss of this k_epoch: -0.005092698149383068






Entropy of this k_epoch: 0.10699335485696793
Average policy_loss of this k_epoch: 0.00026794150471687317
KL Divergence Average Loss: 0.0005532733630388975
Total Loss of this k_epoch: -0.00507619371637702


Entropy of this k_epoch: 0.10972470045089722
Average policy_loss of this k_epoch: 0.000382155179977417
KL Divergence Average Loss: 0.0017366328975185752
Total Loss of this k_epoch: -0.005086713470518589






Entropy of this k_epoch: 0.10361449420452118
Average policy_loss of this k_epoch: 0.00010488182306289673
KL Divergence Average Loss: 0.0007164645940065384
Total Loss of this k_epoch: -0.00506867840886116






Entropy of this k_epoch: 0.11311331391334534
Average policy_loss of this k_epoch: 0.0005375109612941742
KL Divergence Average Loss: 0.0018399020191282034
Total Loss of this k_epoch: -0.005099755711853504


Entropy of this k_epoch: 0.10726995766162872
Average policy_loss of this k_epoch: 0.00027288123965263367
KL Divergence Average Loss: 0.0007166821742430329
Total Loss of this k_epoch: -0.005083449650555849


Entropy of this k_epoch: 0.11631692945957184
Average policy_loss of this k_epoch: 0.0007167086005210876
KL Divergence Average Loss: 0.0010655707446858287
Total Loss of this k_epoch: -0.005088482052087784






Entropy of this k_epoch: 0.11624260991811752
Average policy_loss of this k_epoch: 0.0007045641541481018
KL Divergence Average Loss: 0.0008239186718128622
Total Loss of this k_epoch: -0.005099327303469181


Entropy of this k_epoch: 0.12015199661254883
Average policy_loss of this k_epoch: 0.0008806660771369934
KL Divergence Average Loss: 0.002503159921616316
Total Loss of this k_epoch: -0.005101902410387993






Entropy of this k_epoch: 0.11762095987796783
Average policy_loss of this k_epoch: 0.0007957816123962402
KL Divergence Average Loss: 0.001275793882086873
Total Loss of this k_epoch: -0.005072508938610554






Entropy of this k_epoch: 0.12021735310554504
Average policy_loss of this k_epoch: 0.0009045451879501343
KL Divergence Average Loss: 0.0018937567947432399
Total Loss of this k_epoch: -0.005087384954094887


Entropy of this k_epoch: 0.12065494060516357
Average policy_loss of this k_epoch: 0.0009240619838237762
KL Divergence Average Loss: 0.001490754191763699
Total Loss of this k_epoch: -0.005093777552247047


Entropy of this k_epoch: 0.11430449038743973
Average policy_loss of this k_epoch: 0.0006129145622253418
KL Divergence Average Loss: 0.0013972572050988674
Total Loss of this k_epoch: -0.005088337231427431






Entropy of this k_epoch: 0.11555510759353638
Average policy_loss of this k_epoch: 0.0006461665034294128
KL Divergence Average Loss: 0.0031142483931034803
Total Loss of this k_epoch: -0.0051004462875425816


Entropy of this k_epoch: 0.11452120542526245
Average policy_loss of this k_epoch: 0.0006159208714962006
KL Divergence Average Loss: 0.002197619993239641
Total Loss of this k_epoch: -0.005088163539767265






Entropy of this k_epoch: 0.11430530250072479
Average policy_loss of this k_epoch: 0.0006073713302612305
KL Divergence Average Loss: 0.0012001339346170425
Total Loss of this k_epoch: -0.005095892585813999






Entropy of this k_epoch: 0.1300898790359497
Average policy_loss of this k_epoch: 0.0026863962411880493
KL Divergence Average Loss: 0.23662768304347992
Total Loss of this k_epoch: -0.001451821532100439


Entropy of this k_epoch: 0.11874951422214508
Average policy_loss of this k_epoch: 0.0008304305374622345
KL Divergence Average Loss: 0.001547803170979023
Total Loss of this k_epoch: -0.005091567058116198


Entropy of this k_epoch: 0.13154524564743042
Average policy_loss of this k_epoch: 0.002797648310661316
KL Divergence Average Loss: 0.2891543209552765
Total Loss of this k_epoch: -0.0008880705572664738






Entropy of this k_epoch: 0.11956125497817993
Average policy_loss of this k_epoch: 0.0008768662810325623
KL Divergence Average Loss: 0.0013674928341060877
Total Loss of this k_epoch: -0.005087521392852068


Entropy of this k_epoch: 0.12050765752792358
Average policy_loss of this k_epoch: 0.0009236559271812439
KL Divergence Average Loss: 0.001355174696072936
Total Loss of this k_epoch: -0.005088175181299448






Entropy of this k_epoch: 0.12383861839771271
Average policy_loss of this k_epoch: 0.0010996013879776
KL Divergence Average Loss: 0.0018811598420143127
Total Loss of this k_epoch: -0.005073518492281437






Entropy of this k_epoch: 0.12720784544944763
Average policy_loss of this k_epoch: 0.001254051923751831
KL Divergence Average Loss: 0.0020255122799426317
Total Loss of this k_epoch: -0.005086085759103298


Entropy of this k_epoch: 0.125496968626976
Average policy_loss of this k_epoch: 0.0011846348643302917
KL Divergence Average Loss: 0.002005909802392125
Total Loss of this k_epoch: -0.005070154555141926


Entropy of this k_epoch: 0.12670935690402985
Average policy_loss of this k_epoch: 0.0012389495968818665
KL Divergence Average Loss: 0.00209192861802876
Total Loss of this k_epoch: -0.005075599066913128






Entropy of this k_epoch: 0.12414122372865677
Average policy_loss of this k_epoch: 0.0011247768998146057
KL Divergence Average Loss: 0.0019671607296913862
Total Loss of this k_epoch: -0.00506261270493269


Entropy of this k_epoch: 0.12529721856117249
Average policy_loss of this k_epoch: 0.0011825263500213623
KL Divergence Average Loss: 0.0023496276699006557
Total Loss of this k_epoch: -0.0050588385201990604






Entropy of this k_epoch: 0.11713817715644836
Average policy_loss of this k_epoch: 0.0007728114724159241
KL Divergence Average Loss: 0.0016902141505852342
Total Loss of this k_epoch: -0.005067195277661085




Epoch 19/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.43it/s]


Entropy of this k_epoch: 0.11822878569364548
Average policy_loss of this k_epoch: 0.0008157007396221161
KL Divergence Average Loss: 0.001326533267274499
Total Loss of this k_epoch: -0.005082473158836365


Entropy of this k_epoch: 0.12051917612552643
Average policy_loss of this k_epoch: 0.0009436383843421936
KL Divergence Average Loss: 0.0016762027516961098
Total Loss of this k_epoch: -0.005065558478236198

Last k_epoch stats:
Loss: -0.0050656 | Ratio: 0.9924508 | Entropy Term: 0.1205192


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  59%|█████▉    | 19/32 [01:43<01:10,  5.41s/it]

Entire Validation Dataset Accuracy: 0.9010| 173.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.11280672252178192
Average policy_loss of this k_epoch: 0.025331534445285797
KL Divergence Average Loss: 0.0005789368879050016
Total Loss of this k_epoch: 0.01969698816537857






Entropy of this k_epoch: 0.10348457098007202
Average policy_loss of this k_epoch: -0.005119435489177704
KL Divergence Average Loss: 0.0006165850209072232
Total Loss of this k_epoch: -0.010287498123943806






Entropy of this k_epoch: 0.0858546644449234
Average policy_loss of this k_epoch: -0.017688777297735214
KL Divergence Average Loss: 0.001905371667817235
Total Loss of this k_epoch: -0.02196245826780796


Entropy of this k_epoch: 0.06642195582389832
Average policy_loss of this k_epoch: -0.019939465448260307
KL Divergence Average Loss: 0.005160653963685036
Total Loss of this k_epoch: -0.02320895716547966


Entropy of this k_epoch: 0.051065053790807724
Average policy_loss of this k_epoch: -0.02078136056661606
KL Divergence Average Loss: 0.008872780948877335
Total Loss of this k_epoch: -0.023245885968208313






Entropy of this k_epoch: 0.04308610036969185
Average policy_loss of this k_epoch: -0.021152524277567863
KL Divergence Average Loss: 0.011117519810795784
Total Loss of this k_epoch: -0.023195654153823853


Entropy of this k_epoch: 0.036315079778432846
Average policy_loss of this k_epoch: -0.021433737128973007
KL Divergence Average Loss: 0.013686196878552437
Total Loss of this k_epoch: -0.023112628608942032






Entropy of this k_epoch: 0.029980050399899483
Average policy_loss of this k_epoch: -0.021714776754379272
KL Divergence Average Loss: 0.015688441693782806
Total Loss of this k_epoch: -0.023056894540786743






Entropy of this k_epoch: 0.025742381811141968
Average policy_loss of this k_epoch: -0.021893369033932686
KL Divergence Average Loss: 0.017374573275446892
Total Loss of this k_epoch: -0.0230067428201437


Entropy of this k_epoch: 0.020949000492691994
Average policy_loss of this k_epoch: -0.022077258676290512
KL Divergence Average Loss: 0.019504766911268234
Total Loss of this k_epoch: -0.022929660975933075


Entropy of this k_epoch: 0.019547609612345695
Average policy_loss of this k_epoch: -0.02213297039270401
KL Divergence Average Loss: 0.020073719322681427
Total Loss of this k_epoch: -0.022909613326191902






Entropy of this k_epoch: 0.029043106362223625
Average policy_loss of this k_epoch: -0.020794732496142387
KL Divergence Average Loss: 0.030447689816355705
Total Loss of this k_epoch: -0.021942410618066788


Entropy of this k_epoch: 0.01592447981238365
Average policy_loss of this k_epoch: -0.02225702628493309
KL Divergence Average Loss: 0.021762952208518982
Total Loss of this k_epoch: -0.022835619747638702






Entropy of this k_epoch: 0.014776483178138733
Average policy_loss of this k_epoch: -0.022297028452157974
KL Divergence Average Loss: 0.02232903242111206
Total Loss of this k_epoch: -0.022812562063336372






Entropy of this k_epoch: 0.012235922738909721
Average policy_loss of this k_epoch: -0.022390859201550484
KL Divergence Average Loss: 0.023586999624967575
Total Loss of this k_epoch: -0.022766785696148872


Entropy of this k_epoch: 0.011480298824608326
Average policy_loss of this k_epoch: -0.0224152822047472
KL Divergence Average Loss: 0.023988239467144012
Total Loss of this k_epoch: -0.022749414667487144


Entropy of this k_epoch: 0.011806782335042953
Average policy_loss of this k_epoch: -0.022404154762625694
KL Divergence Average Loss: 0.02381107583642006
Total Loss of this k_epoch: -0.022756382822990417






Entropy of this k_epoch: 0.011262287385761738
Average policy_loss of this k_epoch: -0.02241641655564308
KL Divergence Average Loss: 0.02408260479569435
Total Loss of this k_epoch: -0.022738706320524216


Entropy of this k_epoch: 0.011360900476574898
Average policy_loss of this k_epoch: -0.02242167480289936
KL Divergence Average Loss: 0.024021781980991364
Total Loss of this k_epoch: -0.02274950221180916






Entropy of this k_epoch: 0.01085071824491024
Average policy_loss of this k_epoch: -0.02244197390973568
KL Divergence Average Loss: 0.02443091571331024
Total Loss of this k_epoch: -0.022740202024579048






Entropy of this k_epoch: 0.010007571429014206
Average policy_loss of this k_epoch: -0.02246628701686859
KL Divergence Average Loss: 0.024753421545028687
Total Loss of this k_epoch: -0.022719131782650948


Entropy of this k_epoch: 0.01010526530444622
Average policy_loss of this k_epoch: -0.02245953306555748
KL Divergence Average Loss: 0.024698615074157715
Total Loss of this k_epoch: -0.022717809304594994


Entropy of this k_epoch: 0.011081865057349205
Average policy_loss of this k_epoch: -0.022434011101722717
KL Divergence Average Loss: 0.024281103163957596
Total Loss of this k_epoch: -0.022745294496417046






Entropy of this k_epoch: 0.010899102315306664
Average policy_loss of this k_epoch: -0.022439636290073395
KL Divergence Average Loss: 0.02428433671593666
Total Loss of this k_epoch: -0.02274174802005291


Entropy of this k_epoch: 0.012269938364624977
Average policy_loss of this k_epoch: -0.022395556792616844
KL Divergence Average Loss: 0.02375754527747631
Total Loss of this k_epoch: -0.022771479561924934






Entropy of this k_epoch: 0.012586966156959534
Average policy_loss of this k_epoch: -0.022357555106282234
KL Divergence Average Loss: 0.034523606300354004
Total Loss of this k_epoch: -0.02264166809618473






Entropy of this k_epoch: 0.013172033242881298
Average policy_loss of this k_epoch: -0.022351469844579697
KL Divergence Average Loss: 0.029014555737376213
Total Loss of this k_epoch: -0.02271992526948452


Entropy of this k_epoch: 0.012179220095276833
Average policy_loss of this k_epoch: -0.02239276096224785
KL Divergence Average Loss: 0.023919323459267616
Total Loss of this k_epoch: -0.02276252768933773


Entropy of this k_epoch: 0.01215983647853136
Average policy_loss of this k_epoch: -0.02239062264561653
KL Divergence Average Loss: 0.023660840466618538
Total Loss of this k_epoch: -0.022762006148695946






Entropy of this k_epoch: 0.013048806227743626
Average policy_loss of this k_epoch: -0.022363848984241486
KL Divergence Average Loss: 0.02467581443488598
Total Loss of this k_epoch: -0.02276953123509884


Entropy of this k_epoch: 0.01440921239554882
Average policy_loss of this k_epoch: -0.022316982969641685
KL Divergence Average Loss: 0.02331746555864811
Total Loss of this k_epoch: -0.022804267704486847






Entropy of this k_epoch: 0.013457059860229492
Average policy_loss of this k_epoch: -0.022352084517478943
KL Divergence Average Loss: 0.02369825541973114
Total Loss of this k_epoch: -0.022787954658269882






Entropy of this k_epoch: 0.014397932216525078
Average policy_loss of this k_epoch: -0.022320164367556572
KL Divergence Average Loss: 0.023985538631677628
Total Loss of this k_epoch: -0.022800207138061523


Entropy of this k_epoch: 0.014636389911174774
Average policy_loss of this k_epoch: -0.022309081628918648
KL Divergence Average Loss: 0.0225166454911232
Total Loss of this k_epoch: -0.022815736010670662


Entropy of this k_epoch: 0.014589480124413967
Average policy_loss of this k_epoch: -0.022311635315418243
KL Divergence Average Loss: 0.022550612688064575
Total Loss of this k_epoch: -0.022815601900219917






Entropy of this k_epoch: 0.016992561519145966
Average policy_loss of this k_epoch: -0.02222866751253605
KL Divergence Average Loss: 0.02125837653875351
Total Loss of this k_epoch: -0.022865712642669678


Entropy of this k_epoch: 0.017472300678491592
Average policy_loss of this k_epoch: -0.02222343534231186
KL Divergence Average Loss: 0.021646207198500633
Total Loss of this k_epoch: -0.022880587726831436






Entropy of this k_epoch: 0.017801720649003983
Average policy_loss of this k_epoch: -0.022193314507603645
KL Divergence Average Loss: 0.020958293229341507
Total Loss of this k_epoch: -0.02287381701171398






Entropy of this k_epoch: 0.017380114644765854
Average policy_loss of this k_epoch: -0.022217418998479843
KL Divergence Average Loss: 0.0214634258300066
Total Loss of this k_epoch: -0.02287179045379162


Entropy of this k_epoch: 0.0179060660302639
Average policy_loss of this k_epoch: -0.022189002484083176
KL Divergence Average Loss: 0.02086452953517437
Total Loss of this k_epoch: -0.02287565916776657


Entropy of this k_epoch: 0.019448023289442062
Average policy_loss of this k_epoch: -0.02213507518172264
KL Divergence Average Loss: 0.020178183913230896
Total Loss of this k_epoch: -0.02290569432079792






Entropy of this k_epoch: 0.020641803741455078
Average policy_loss of this k_epoch: -0.022096429020166397
KL Divergence Average Loss: 0.02040914073586464
Total Loss of this k_epoch: -0.022924426943063736


Entropy of this k_epoch: 0.020305240526795387
Average policy_loss of this k_epoch: -0.022102173417806625
KL Divergence Average Loss: 0.019779201596975327
Total Loss of this k_epoch: -0.02291964367032051






Entropy of this k_epoch: 0.02343067340552807
Average policy_loss of this k_epoch: -0.021989520639181137
KL Divergence Average Loss: 0.018487397581338882
Total Loss of this k_epoch: -0.022976180538535118






Entropy of this k_epoch: 0.03138945251703262
Average policy_loss of this k_epoch: -0.02152875065803528
KL Divergence Average Loss: 0.0646866112947464
Total Loss of this k_epoch: -0.022451356053352356


Entropy of this k_epoch: 0.024940453469753265
Average policy_loss of this k_epoch: -0.021924367174506187
KL Divergence Average Loss: 0.017748437821865082
Total Loss of this k_epoch: -0.0229939054697752


Entropy of this k_epoch: 0.03000500798225403
Average policy_loss of this k_epoch: -0.0217141043394804
KL Divergence Average Loss: 0.021990763023495674
Total Loss of this k_epoch: -0.022994447499513626






Entropy of this k_epoch: 0.02990906313061714
Average policy_loss of this k_epoch: -0.021728038787841797
KL Divergence Average Loss: 0.01575883850455284
Total Loss of this k_epoch: -0.023065902292728424


Entropy of this k_epoch: 0.030990982428193092
Average policy_loss of this k_epoch: -0.021676775068044662
KL Divergence Average Loss: 0.015381388366222382
Total Loss of this k_epoch: -0.023072510957717896






Entropy of this k_epoch: 0.032945841550827026
Average policy_loss of this k_epoch: -0.02159443497657776
KL Divergence Average Loss: 0.014636039733886719
Total Loss of this k_epoch: -0.02309536561369896






Entropy of this k_epoch: 0.03384237736463547
Average policy_loss of this k_epoch: -0.021558282896876335
KL Divergence Average Loss: 0.014278056100010872
Total Loss of this k_epoch: -0.02310761995613575


Entropy of this k_epoch: 0.03674021735787392
Average policy_loss of this k_epoch: -0.02143487147986889
KL Divergence Average Loss: 0.013231786899268627
Total Loss of this k_epoch: -0.02313956432044506


Entropy of this k_epoch: 0.03880428150296211
Average policy_loss of this k_epoch: -0.021319005638360977
KL Divergence Average Loss: 0.012578004039824009
Total Loss of this k_epoch: -0.02313343808054924






Entropy of this k_epoch: 0.04076841101050377
Average policy_loss of this k_epoch: -0.021265123039484024
KL Divergence Average Loss: 0.011821512132883072
Total Loss of this k_epoch: -0.023185329511761665


Entropy of this k_epoch: 0.043961890041828156
Average policy_loss of this k_epoch: -0.02109922468662262
KL Divergence Average Loss: 0.010842757299542427
Total Loss of this k_epoch: -0.023188890889286995






Entropy of this k_epoch: 0.04368705302476883
Average policy_loss of this k_epoch: -0.02110668271780014
KL Divergence Average Loss: 0.011109336279332638
Total Loss of this k_epoch: -0.02317994087934494






Entropy of this k_epoch: 0.05519513413310051
Average policy_loss of this k_epoch: -0.020483223721385002
KL Divergence Average Loss: 0.008258292451500893
Total Loss of this k_epoch: -0.02316039800643921


Entropy of this k_epoch: 0.05421258136630058
Average policy_loss of this k_epoch: -0.0206269770860672
KL Divergence Average Loss: 0.007855268195271492
Total Loss of this k_epoch: -0.023259053006768227


Entropy of this k_epoch: 0.05801183730363846
Average policy_loss of this k_epoch: -0.020378142595291138
KL Divergence Average Loss: 0.007156719453632832
Total Loss of this k_epoch: -0.02320716716349125






Entropy of this k_epoch: 0.05581185966730118
Average policy_loss of this k_epoch: -0.02049321122467518
KL Divergence Average Loss: 0.00748888123780489
Total Loss of this k_epoch: -0.023208914324641228


Entropy of this k_epoch: 0.05879594013094902
Average policy_loss of this k_epoch: -0.02041052281856537
KL Divergence Average Loss: 0.006801589857786894
Total Loss of this k_epoch: -0.023282304406166077






Entropy of this k_epoch: 0.06427694112062454
Average policy_loss of this k_epoch: -0.02010062150657177
KL Divergence Average Loss: 0.0054420423693954945
Total Loss of this k_epoch: -0.023260047659277916




Epoch 20/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.43it/s]

Entropy of this k_epoch: 0.06127789989113808
Average policy_loss of this k_epoch: -0.02025355026125908
KL Divergence Average Loss: 0.006307760253548622
Total Loss of this k_epoch: -0.02325436659157276


Entropy of this k_epoch: 0.06462844461202621
Average policy_loss of this k_epoch: -0.02003868669271469
KL Divergence Average Loss: 0.005454692989587784
Total Loss of this k_epoch: -0.023215562105178833

Last k_epoch stats:
Loss: -0.0232156 | Ratio: 1.0045264 | Entropy Term: 0.0646284



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  62%|██████▎   | 20/32 [01:48<01:04,  5.41s/it]

Entire Validation Dataset Accuracy: 0.9167| 176.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.06701119244098663
Average policy_loss of this k_epoch: 0.00010836124420166016
KL Divergence Average Loss: 0.0004493453889153898
Total Loss of this k_epoch: -0.0032377049792557955






Entropy of this k_epoch: 0.06763476878404617
Average policy_loss of this k_epoch: 0.00013583898544311523
KL Divergence Average Loss: 0.0004459808114916086
Total Loss of this k_epoch: -0.0032414395827800035






Entropy of this k_epoch: 0.07350073009729385
Average policy_loss of this k_epoch: 0.00036183372139930725
KL Divergence Average Loss: 0.0006303588161244988
Total Loss of this k_epoch: -0.003306899219751358


Entropy of this k_epoch: 0.07467855513095856
Average policy_loss of this k_epoch: 0.0004057064652442932
KL Divergence Average Loss: 0.0007358010625466704
Total Loss of this k_epoch: -0.0033208634704351425


Entropy of this k_epoch: 0.07769517600536346
Average policy_loss of this k_epoch: 0.0005299970507621765
KL Divergence Average Loss: 0.0009358395473100245
Total Loss of this k_epoch: -0.003345403354614973






Entropy of this k_epoch: 0.08519204705953598
Average policy_loss of this k_epoch: 0.0008282698690891266
KL Divergence Average Loss: 0.0017407680861651897
Total Loss of this k_epoch: -0.0034139249473810196


Entropy of this k_epoch: 0.08409911394119263
Average policy_loss of this k_epoch: 0.0008093081414699554
KL Divergence Average Loss: 0.0019940845668315887
Total Loss of this k_epoch: -0.003375706961378455






Entropy of this k_epoch: 0.09783077239990234
Average policy_loss of this k_epoch: 0.0014506950974464417
KL Divergence Average Loss: 0.005950495600700378
Total Loss of this k_epoch: -0.0033813384361565113






Entropy of this k_epoch: 0.09872929751873016
Average policy_loss of this k_epoch: 0.0014468953013420105
KL Divergence Average Loss: 0.004401118494570255
Total Loss of this k_epoch: -0.0034455587156116962


Entropy of this k_epoch: 0.10153239965438843
Average policy_loss of this k_epoch: 0.0015429556369781494
KL Divergence Average Loss: 0.004821444861590862
Total Loss of this k_epoch: -0.003485450055450201


Entropy of this k_epoch: 0.10655580461025238
Average policy_loss of this k_epoch: 0.0017728954553604126
KL Divergence Average Loss: 0.005766665562987328
Total Loss of this k_epoch: -0.003497228492051363






Entropy of this k_epoch: 0.11434772610664368
Average policy_loss of this k_epoch: 0.0021392442286014557
KL Divergence Average Loss: 0.008051717653870583
Total Loss of this k_epoch: -0.0034976250026375055


Entropy of this k_epoch: 0.11029870808124542
Average policy_loss of this k_epoch: 0.001945890486240387
KL Divergence Average Loss: 0.0069807060062885284
Total Loss of this k_epoch: -0.0034992380533367395






Entropy of this k_epoch: 0.11705997586250305
Average policy_loss of this k_epoch: 0.002289004623889923
KL Divergence Average Loss: 0.009188024327158928
Total Loss of this k_epoch: -0.003472113749012351






Entropy of this k_epoch: 0.12880852818489075
Average policy_loss of this k_epoch: 0.0029603540897369385
KL Divergence Average Loss: 0.01615118235349655
Total Loss of this k_epoch: -0.0033185607753694057


Entropy of this k_epoch: 0.12370418012142181
Average policy_loss of this k_epoch: 0.002613082528114319
KL Divergence Average Loss: 0.011615637689828873
Total Loss of this k_epoch: -0.0034559699706733227


Entropy of this k_epoch: 0.12381045520305634
Average policy_loss of this k_epoch: 0.0026130378246307373
KL Divergence Average Loss: 0.01114768348634243
Total Loss of this k_epoch: -0.0034660084638744593






Entropy of this k_epoch: 0.12252068519592285
Average policy_loss of this k_epoch: 0.0025374069809913635
KL Divergence Average Loss: 0.010684886015951633
Total Loss of this k_epoch: -0.003481778781861067


Entropy of this k_epoch: 0.11962300539016724
Average policy_loss of this k_epoch: 0.0024252645671367645
KL Divergence Average Loss: 0.010433807969093323
Total Loss of this k_epoch: -0.0034515478182584047






Entropy of this k_epoch: 0.12279567122459412
Average policy_loss of this k_epoch: 0.002556823194026947
KL Divergence Average Loss: 0.010919821448624134
Total Loss of this k_epoch: -0.0034737621899694204






Entropy of this k_epoch: 0.12188926339149475
Average policy_loss of this k_epoch: 0.00255633145570755
KL Divergence Average Loss: 0.01146998256444931
Total Loss of this k_epoch: -0.0034234318882226944


Entropy of this k_epoch: 0.11536476016044617
Average policy_loss of this k_epoch: 0.0022030621767044067
KL Divergence Average Loss: 0.008441191166639328
Total Loss of this k_epoch: -0.0034807641059160233


Entropy of this k_epoch: 0.11218243837356567
Average policy_loss of this k_epoch: 0.0020537301898002625
KL Divergence Average Loss: 0.007602882571518421
Total Loss of this k_epoch: -0.003479363163933158






Entropy of this k_epoch: 0.1138358861207962
Average policy_loss of this k_epoch: 0.0021126605570316315
KL Divergence Average Loss: 0.007677759043872356
Total Loss of this k_epoch: -0.003502356121316552


Entropy of this k_epoch: 0.10869716107845306
Average policy_loss of this k_epoch: 0.001865983009338379
KL Divergence Average Loss: 0.006323385052382946
Total Loss of this k_epoch: -0.0035056411288678646






Entropy of this k_epoch: 0.11382851004600525
Average policy_loss of this k_epoch: 0.0021195560693740845
KL Divergence Average Loss: 0.008099570870399475
Total Loss of this k_epoch: -0.0034908736124634743






Entropy of this k_epoch: 0.11340934038162231
Average policy_loss of this k_epoch: 0.0020881444215774536
KL Divergence Average Loss: 0.007808886002749205
Total Loss of this k_epoch: -0.003504233667626977


Entropy of this k_epoch: 0.10149554908275604
Average policy_loss of this k_epoch: 0.0015415027737617493
KL Divergence Average Loss: 0.0046325018629431725
Total Loss of this k_epoch: -0.0034869499504566193


Entropy of this k_epoch: 0.10644123703241348
Average policy_loss of this k_epoch: 0.0017608851194381714
KL Divergence Average Loss: 0.0058352104388177395
Total Loss of this k_epoch: -0.0035028248094022274






Entropy of this k_epoch: 0.10674761235713959
Average policy_loss of this k_epoch: 0.0017777197062969208
KL Divergence Average Loss: 0.00594758428633213
Total Loss of this k_epoch: -0.0035001852083951235


Entropy of this k_epoch: 0.10332116484642029
Average policy_loss of this k_epoch: 0.001632004976272583
KL Divergence Average Loss: 0.005106387194246054
Total Loss of this k_epoch: -0.003482989501208067






Entropy of this k_epoch: 0.10318269580602646
Average policy_loss of this k_epoch: 0.0016129612922668457
KL Divergence Average Loss: 0.005127680022269487
Total Loss of this k_epoch: -0.003494896925985813






Entropy of this k_epoch: 0.10184937715530396
Average policy_loss of this k_epoch: 0.0015812888741493225
KL Divergence Average Loss: 0.005378145724534988
Total Loss of this k_epoch: -0.003457398619502783


Entropy of this k_epoch: 0.10250933468341827
Average policy_loss of this k_epoch: 0.0015773773193359375
KL Divergence Average Loss: 0.004695394076406956
Total Loss of this k_epoch: -0.003501135390251875


Entropy of this k_epoch: 0.10631508380174637
Average policy_loss of this k_epoch: 0.0017845295369625092
KL Divergence Average Loss: 0.006197875365614891
Total Loss of this k_epoch: -0.0034692459739744663






Entropy of this k_epoch: 0.10495074838399887
Average policy_loss of this k_epoch: 0.0016915351152420044
KL Divergence Average Loss: 0.00525709381327033
Total Loss of this k_epoch: -0.0035034315660595894


Entropy of this k_epoch: 0.10581160336732864
Average policy_loss of this k_epoch: 0.0017307326197624207
KL Divergence Average Loss: 0.005535120610147715
Total Loss of this k_epoch: -0.003504496533423662






Entropy of this k_epoch: 0.10330893844366074
Average policy_loss of this k_epoch: 0.0016065314412117004
KL Divergence Average Loss: 0.004829934798181057
Total Loss of this k_epoch: -0.0035106162540614605






Entropy of this k_epoch: 0.10926266759634018
Average policy_loss of this k_epoch: 0.0018988922238349915
KL Divergence Average Loss: 0.006371685303747654
Total Loss of this k_epoch: -0.003500524442642927


Entropy of this k_epoch: 0.10649985820055008
Average policy_loss of this k_epoch: 0.001763470470905304
KL Divergence Average Loss: 0.005804535932838917
Total Loss of this k_epoch: -0.003503476968035102


Entropy of this k_epoch: 0.1097361147403717
Average policy_loss of this k_epoch: 0.0019171759486198425
KL Divergence Average Loss: 0.006609332747757435
Total Loss of this k_epoch: -0.0035035365726798773






Entropy of this k_epoch: 0.11279890686273575
Average policy_loss of this k_epoch: 0.0020544081926345825
KL Divergence Average Loss: 0.007317481562495232
Total Loss of this k_epoch: -0.003512362716719508


Entropy of this k_epoch: 0.11229203641414642
Average policy_loss of this k_epoch: 0.002048056572675705
KL Divergence Average Loss: 0.007567540742456913
Total Loss of this k_epoch: -0.0034908701200038195






Entropy of this k_epoch: 0.1131814569234848
Average policy_loss of this k_epoch: 0.002075895667076111
KL Divergence Average Loss: 0.007645240984857082
Total Loss of this k_epoch: -0.0035067249555140734






Entropy of this k_epoch: 0.11174776405096054
Average policy_loss of this k_epoch: 0.0020171478390693665
KL Divergence Average Loss: 0.007349499501287937
Total Loss of this k_epoch: -0.003496745368465781


Entropy of this k_epoch: 0.10577983409166336
Average policy_loss of this k_epoch: 0.0017341524362564087
KL Divergence Average Loss: 0.005737388972193003
Total Loss of this k_epoch: -0.003497465280815959


Entropy of this k_epoch: 0.11048160493373871
Average policy_loss of this k_epoch: 0.0019541382789611816
KL Divergence Average Loss: 0.00698724202811718
Total Loss of this k_epoch: -0.0035000694915652275






Entropy of this k_epoch: 0.11303193867206573
Average policy_loss of this k_epoch: 0.00206959992647171
KL Divergence Average Loss: 0.007497619837522507
Total Loss of this k_epoch: -0.0035070208832621574


Entropy of this k_epoch: 0.11167974770069122
Average policy_loss of this k_epoch: 0.0020155571401119232
KL Divergence Average Loss: 0.007385752163827419
Total Loss of this k_epoch: -0.003494572825729847






Entropy of this k_epoch: 0.11081619560718536
Average policy_loss of this k_epoch: 0.0019673816859722137
KL Divergence Average Loss: 0.0069525158032774925
Total Loss of this k_epoch: -0.003503903280943632






Entropy of this k_epoch: 0.11300857365131378
Average policy_loss of this k_epoch: 0.0020572617650032043
KL Divergence Average Loss: 0.0073891375213861465
Total Loss of this k_epoch: -0.0035192754585295916


Entropy of this k_epoch: 0.11105529218912125
Average policy_loss of this k_epoch: 0.001975398510694504
KL Divergence Average Loss: 0.007187687326222658
Total Loss of this k_epoch: -0.0035054890904575586


Entropy of this k_epoch: 0.1134069412946701
Average policy_loss of this k_epoch: 0.0020935572683811188
KL Divergence Average Loss: 0.007817055098712444
Total Loss of this k_epoch: -0.003498619422316551






Entropy of this k_epoch: 0.1102508008480072
Average policy_loss of this k_epoch: 0.001933656632900238
KL Divergence Average Loss: 0.006616867613047361
Total Loss of this k_epoch: -0.0035127149894833565


Entropy of this k_epoch: 0.11479178071022034
Average policy_loss of this k_epoch: 0.002149481326341629
KL Divergence Average Loss: 0.007885084487497807
Total Loss of this k_epoch: -0.003511257003992796






Entropy of this k_epoch: 0.11339540779590607
Average policy_loss of this k_epoch: 0.002070970833301544
KL Divergence Average Loss: 0.007703443989157677
Total Loss of this k_epoch: -0.0035217651166021824






Entropy of this k_epoch: 0.11315678060054779
Average policy_loss of this k_epoch: 0.002080172300338745
KL Divergence Average Loss: 0.00789767224341631
Total Loss of this k_epoch: -0.003498690202832222


Entropy of this k_epoch: 0.11031792312860489
Average policy_loss of this k_epoch: 0.0019595101475715637
KL Divergence Average Loss: 0.007052699103951454
Total Loss of this k_epoch: -0.003485859138891101


Entropy of this k_epoch: 0.11407520622015
Average policy_loss of this k_epoch: 0.002117931842803955
KL Divergence Average Loss: 0.007702434901148081
Total Loss of this k_epoch: -0.0035088041331619024






Entropy of this k_epoch: 0.11387388408184052
Average policy_loss of this k_epoch: 0.0021141767501831055
KL Divergence Average Loss: 0.007927961647510529
Total Loss of this k_epoch: -0.0035002378281205893


Entropy of this k_epoch: 0.11312901973724365
Average policy_loss of this k_epoch: 0.0020797401666641235
KL Divergence Average Loss: 0.007687440142035484
Total Loss of this k_epoch: -0.00349983642809093






Entropy of this k_epoch: 0.1081625446677208
Average policy_loss of this k_epoch: 0.0018463805317878723
KL Divergence Average Loss: 0.006271528545767069
Total Loss of this k_epoch: -0.0034990315325558186




Epoch 21/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.45it/s]


Entropy of this k_epoch: 0.10843874514102936
Average policy_loss of this k_epoch: 0.0018461495637893677
KL Divergence Average Loss: 0.006140487268567085
Total Loss of this k_epoch: -0.003514382988214493


Entropy of this k_epoch: 0.11000196635723114
Average policy_loss of this k_epoch: 0.0019209347665309906
KL Divergence Average Loss: 0.0069237202405929565
Total Loss of this k_epoch: -0.0035099261440336704

Last k_epoch stats:
Loss: -0.0035099 | Ratio: 0.9846325 | Entropy Term: 0.1100020


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  66%|██████▌   | 21/32 [01:53<00:59,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9375| 180.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200, 0.2200,
        0.2200], device='cuda:0')





Entropy of this k_epoch: 0.1160273626446724
Average policy_loss of this k_epoch: 0.04154485464096069
KL Divergence Average Loss: 0.0014491431647911668
Total Loss of this k_epoch: 0.03575797751545906






Entropy of this k_epoch: 0.10094386339187622
Average policy_loss of this k_epoch: -0.01822017692029476
KL Divergence Average Loss: 0.0005017251241952181
Total Loss of this k_epoch: -0.023262351751327515






Entropy of this k_epoch: 0.0917244553565979
Average policy_loss of this k_epoch: 0.00176277756690979
KL Divergence Average Loss: 0.0019746311008930206
Total Loss of this k_epoch: -0.0028036991134285927


Entropy of this k_epoch: 0.07390561699867249
Average policy_loss of this k_epoch: -0.03023558109998703
KL Divergence Average Loss: 0.0028425564523786306
Total Loss of this k_epoch: -0.03390243649482727


Entropy of this k_epoch: 0.05549164488911629
Average policy_loss of this k_epoch: -0.03143492713570595
KL Divergence Average Loss: 0.0063945092260837555
Total Loss of this k_epoch: -0.03414556384086609






Entropy of this k_epoch: 0.05248931050300598
Average policy_loss of this k_epoch: -0.0316307507455349
KL Divergence Average Loss: 0.007217474281787872
Total Loss of this k_epoch: -0.034183043986558914


Entropy of this k_epoch: 0.0396377295255661
Average policy_loss of this k_epoch: -0.032342053949832916
KL Divergence Average Loss: 0.01075077150017023
Total Loss of this k_epoch: -0.03421643376350403






Entropy of this k_epoch: 0.03391958773136139
Average policy_loss of this k_epoch: -0.0326620489358902
KL Divergence Average Loss: 0.012679646722972393
Total Loss of this k_epoch: -0.03423123061656952





Epoch 22/32 (Inner K-Epochs):  16%|█▌        | 10/64 [00:00<00:04, 12.50it/s]

Entropy of this k_epoch: 0.030857913196086884
Average policy_loss of this k_epoch: -0.03280859813094139
KL Divergence Average Loss: 0.013740536756813526
Total Loss of this k_epoch: -0.034214090555906296


Entropy of this k_epoch: 0.02534261718392372
Average policy_loss of this k_epoch: -0.03305883705615997
KL Divergence Average Loss: 0.015853123739361763
Total Loss of this k_epoch: -0.03416743874549866



[A


Entropy of this k_epoch: 0.025314493104815483
Average policy_loss of this k_epoch: -0.03306737542152405
KL Divergence Average Loss: 0.01583472639322281
Total Loss of this k_epoch: -0.03417475149035454






Entropy of this k_epoch: 0.02133433148264885
Average policy_loss of this k_epoch: -0.03325187414884567
KL Divergence Average Loss: 0.017503682523965836
Total Loss of this k_epoch: -0.03414355590939522






Entropy of this k_epoch: 0.019914913922548294
Average policy_loss of this k_epoch: -0.03330768644809723
KL Divergence Average Loss: 0.01807098276913166
Total Loss of this k_epoch: -0.03412272036075592


Entropy of this k_epoch: 0.017131812870502472
Average policy_loss of this k_epoch: -0.03341365605592728
KL Divergence Average Loss: 0.019367747008800507
Total Loss of this k_epoch: -0.034076567739248276


Entropy of this k_epoch: 0.0173218734562397
Average policy_loss of this k_epoch: -0.03340291976928711
KL Divergence Average Loss: 0.019235599786043167
Total Loss of this k_epoch: -0.03407665714621544






Entropy of this k_epoch: 0.01649296283721924
Average policy_loss of this k_epoch: -0.03345783054828644
KL Divergence Average Loss: 0.019625429064035416
Total Loss of this k_epoch: -0.03408622369170189


Entropy of this k_epoch: 0.016137635335326195
Average policy_loss of this k_epoch: -0.033447109162807465
KL Divergence Average Loss: 0.019961778074502945
Total Loss of this k_epoch: -0.03405437618494034






Entropy of this k_epoch: 0.01665031537413597
Average policy_loss of this k_epoch: -0.03344947472214699
KL Divergence Average Loss: 0.01952936500310898
Total Loss of this k_epoch: -0.034086696803569794






Entropy of this k_epoch: 0.013549214228987694
Average policy_loss of this k_epoch: -0.03359343856573105
KL Divergence Average Loss: 0.02106960490345955
Total Loss of this k_epoch: -0.03406020253896713


Entropy of this k_epoch: 0.014022212475538254
Average policy_loss of this k_epoch: -0.03355922922492027
KL Divergence Average Loss: 0.020783182233572006
Total Loss of this k_epoch: -0.03405250981450081


Entropy of this k_epoch: 0.013996612280607224
Average policy_loss of this k_epoch: -0.033554915338754654
KL Divergence Average Loss: 0.020773915573954582
Total Loss of this k_epoch: -0.03404700383543968






Entropy of this k_epoch: 0.014187069609761238
Average policy_loss of this k_epoch: -0.03355007246136665
KL Divergence Average Loss: 0.020681697875261307
Total Loss of this k_epoch: -0.03405261039733887


Entropy of this k_epoch: 0.014148589223623276
Average policy_loss of this k_epoch: -0.03353523463010788
KL Divergence Average Loss: 0.020797114819288254
Total Loss of this k_epoch: -0.034034691751003265






Entropy of this k_epoch: 0.012741036713123322
Average policy_loss of this k_epoch: -0.033618297427892685
KL Divergence Average Loss: 0.021453600376844406
Total Loss of this k_epoch: -0.03404081240296364






Entropy of this k_epoch: 0.012179724872112274
Average policy_loss of this k_epoch: -0.03362896665930748
KL Divergence Average Loss: 0.021722570061683655
Total Loss of this k_epoch: -0.03402072936296463


Entropy of this k_epoch: 0.01403476670384407
Average policy_loss of this k_epoch: -0.0335691012442112
KL Divergence Average Loss: 0.020761454477906227
Total Loss of this k_epoch: -0.034063223749399185


Entropy of this k_epoch: 0.014016876928508282
Average policy_loss of this k_epoch: -0.03355623036623001
KL Divergence Average Loss: 0.020805388689041138
Total Loss of this k_epoch: -0.03404901921749115






Entropy of this k_epoch: 0.01361069269478321
Average policy_loss of this k_epoch: -0.03357579559087753
KL Divergence Average Loss: 0.0209768395870924
Total Loss of this k_epoch: -0.034046564251184464


Entropy of this k_epoch: 0.0125247398391366
Average policy_loss of this k_epoch: -0.033614084124565125
KL Divergence Average Loss: 0.021492285653948784
Total Loss of this k_epoch: -0.0340253971517086






Entropy of this k_epoch: 0.01411319337785244
Average policy_loss of this k_epoch: -0.03354083001613617
KL Divergence Average Loss: 0.020719269290566444
Total Loss of this k_epoch: -0.034039296209812164






Entropy of this k_epoch: 0.01429703738540411
Average policy_loss of this k_epoch: -0.03354177996516228
KL Divergence Average Loss: 0.02068796381354332
Total Loss of this k_epoch: -0.03404975309967995


Entropy of this k_epoch: 0.014298389665782452
Average policy_loss of this k_epoch: -0.033549703657627106
KL Divergence Average Loss: 0.020756546407938004
Total Loss of this k_epoch: -0.03405705839395523


Entropy of this k_epoch: 0.015274769626557827
Average policy_loss of this k_epoch: -0.03349820896983147
KL Divergence Average Loss: 0.020172325894236565
Total Loss of this k_epoch: -0.034060221165418625






Entropy of this k_epoch: 0.014029481448233128
Average policy_loss of this k_epoch: -0.03355814516544342
KL Divergence Average Loss: 0.020811986178159714
Total Loss of this k_epoch: -0.034051500260829926


Entropy of this k_epoch: 0.014861796982586384
Average policy_loss of this k_epoch: -0.03352160006761551
KL Divergence Average Loss: 0.020493775606155396
Total Loss of this k_epoch: -0.03405975177884102






Entropy of this k_epoch: 0.015267949551343918
Average policy_loss of this k_epoch: -0.033512331545352936
KL Divergence Average Loss: 0.020213685929775238
Total Loss of this k_epoch: -0.034073591232299805






Entropy of this k_epoch: 0.0182158462703228
Average policy_loss of this k_epoch: -0.03338656574487686
KL Divergence Average Loss: 0.02296861819922924
Total Loss of this k_epoch: -0.03406767174601555


Entropy of this k_epoch: 0.016828816384077072
Average policy_loss of this k_epoch: -0.03343316167593002
KL Divergence Average Loss: 0.01948804408311844
Total Loss of this k_epoch: -0.03407972306013107


Entropy of this k_epoch: 0.017321797087788582
Average policy_loss of this k_epoch: -0.03342723846435547
KL Divergence Average Loss: 0.019249631091952324
Total Loss of this k_epoch: -0.03410083055496216






Entropy of this k_epoch: 0.0171742495149374
Average policy_loss of this k_epoch: -0.03342196345329285
KL Divergence Average Loss: 0.019279668107628822
Total Loss of this k_epoch: -0.034087881445884705


Entropy of this k_epoch: 0.018837807700037956
Average policy_loss of this k_epoch: -0.033334918320178986
KL Divergence Average Loss: 0.018603220582008362
Total Loss of this k_epoch: -0.03409077599644661






Entropy of this k_epoch: 0.01895860768854618
Average policy_loss of this k_epoch: -0.033345311880111694
KL Divergence Average Loss: 0.018490543588995934
Total Loss of this k_epoch: -0.03410833701491356






Entropy of this k_epoch: 0.01920069195330143
Average policy_loss of this k_epoch: -0.0333130769431591
KL Divergence Average Loss: 0.018460437655448914
Total Loss of this k_epoch: -0.034088507294654846


Entropy of this k_epoch: 0.019888468086719513
Average policy_loss of this k_epoch: -0.03330790251493454
KL Divergence Average Loss: 0.01809326931834221
Total Loss of this k_epoch: -0.03412139415740967


Entropy of this k_epoch: 0.02112375944852829
Average policy_loss of this k_epoch: -0.0332389734685421
KL Divergence Average Loss: 0.01761213317513466
Total Loss of this k_epoch: -0.03411903977394104






Entropy of this k_epoch: 0.020876290276646614
Average policy_loss of this k_epoch: -0.03326112776994705
KL Divergence Average Loss: 0.017697792500257492
Total Loss of this k_epoch: -0.034127965569496155


Entropy of this k_epoch: 0.024030689150094986
Average policy_loss of this k_epoch: -0.033109672367572784
KL Divergence Average Loss: 0.01647091656923294
Total Loss of this k_epoch: -0.034146495163440704






Entropy of this k_epoch: 0.022040411829948425
Average policy_loss of this k_epoch: -0.033172767609357834
KL Divergence Average Loss: 0.01722945272922516
Total Loss of this k_epoch: -0.03410249203443527






Entropy of this k_epoch: 0.024661976844072342
Average policy_loss of this k_epoch: -0.03308416157960892
KL Divergence Average Loss: 0.016135651618242264
Total Loss of this k_epoch: -0.03415590152144432


Entropy of this k_epoch: 0.023289937525987625
Average policy_loss of this k_epoch: -0.033128272742033005
KL Divergence Average Loss: 0.016695424914360046
Total Loss of this k_epoch: -0.03412581607699394


Entropy of this k_epoch: 0.02441767230629921
Average policy_loss of this k_epoch: -0.03309454768896103
KL Divergence Average Loss: 0.016217971220612526
Total Loss of this k_epoch: -0.034153249114751816






Entropy of this k_epoch: 0.025664137676358223
Average policy_loss of this k_epoch: -0.033051300793886185
KL Divergence Average Loss: 0.01597476378083229
Total Loss of this k_epoch: -0.034174758940935135


Entropy of this k_epoch: 0.02618267573416233
Average policy_loss of this k_epoch: -0.033031146973371506
KL Divergence Average Loss: 0.01549096405506134
Total Loss of this k_epoch: -0.03418537229299545






Entropy of this k_epoch: 0.028777798637747765
Average policy_loss of this k_epoch: -0.0329018160700798
KL Divergence Average Loss: 0.014543279074132442
Total Loss of this k_epoch: -0.034195274114608765






Entropy of this k_epoch: 0.0298813134431839
Average policy_loss of this k_epoch: -0.03283783793449402
KL Divergence Average Loss: 0.014184786006808281
Total Loss of this k_epoch: -0.03419005498290062


Entropy of this k_epoch: 0.02968793921172619
Average policy_loss of this k_epoch: -0.03286858648061752
KL Divergence Average Loss: 0.014163225889205933
Total Loss of this k_epoch: -0.034211352467536926


Entropy of this k_epoch: 0.027889614924788475
Average policy_loss of this k_epoch: -0.032954830676317215
KL Divergence Average Loss: 0.014901410788297653
Total Loss of this k_epoch: -0.03420029580593109






Entropy of this k_epoch: 0.031077228486537933
Average policy_loss of this k_epoch: -0.03277888894081116
KL Divergence Average Loss: 0.013724273070693016
Total Loss of this k_epoch: -0.03419550508260727


Entropy of this k_epoch: 0.031127694994211197
Average policy_loss of this k_epoch: -0.03275894373655319
KL Divergence Average Loss: 0.013677815906703472
Total Loss of this k_epoch: -0.03417855128645897






Entropy of this k_epoch: 0.032485298812389374
Average policy_loss of this k_epoch: -0.032734811305999756
KL Divergence Average Loss: 0.013216497376561165
Total Loss of this k_epoch: -0.0342269092798233






Entropy of this k_epoch: 0.03713151067495346
Average policy_loss of this k_epoch: -0.03244924172759056
KL Divergence Average Loss: 0.01707972213625908
Total Loss of this k_epoch: -0.03413502126932144


Entropy of this k_epoch: 0.03367157280445099
Average policy_loss of this k_epoch: -0.0326601080596447
KL Divergence Average Loss: 0.012794632464647293
Total Loss of this k_epoch: -0.034215740859508514


Entropy of this k_epoch: 0.033232010900974274
Average policy_loss of this k_epoch: -0.032671887427568436
KL Divergence Average Loss: 0.012866022065281868
Total Loss of this k_epoch: -0.03420482575893402




Epoch 22/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.42it/s]


Entropy of this k_epoch: 0.03541558235883713
Average policy_loss of this k_epoch: -0.032582029700279236
KL Divergence Average Loss: 0.012164033949375153
Total Loss of this k_epoch: -0.034231167286634445

Last k_epoch stats:
Loss: -0.0342312 | Ratio: 0.9992323 | Entropy Term: 0.0354156


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  69%|██████▉   | 22/32 [01:59<00:54,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9219| 177.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.035209573805332184
Average policy_loss of this k_epoch: 9.697675704956055e-05
KL Divergence Average Loss: 0.00032421245123259723
Total Loss of this k_epoch: -0.0016602597897872329






Entropy of this k_epoch: 0.037734344601631165
Average policy_loss of this k_epoch: 0.00017175078392028809
KL Divergence Average Loss: 0.0004020250926259905
Total Loss of this k_epoch: -0.0017109462060034275






Entropy of this k_epoch: 0.03827313706278801
Average policy_loss of this k_epoch: 0.00018506869673728943
KL Divergence Average Loss: 0.00030800054082646966
Total Loss of this k_epoch: -0.0017255081329494715


Entropy of this k_epoch: 0.038607168942689896
Average policy_loss of this k_epoch: 0.00019811838865280151
KL Divergence Average Loss: 0.0004078407073393464
Total Loss of this k_epoch: -0.0017281617037951946


Entropy of this k_epoch: 0.04268081486225128
Average policy_loss of this k_epoch: 0.00031919777393341064
KL Divergence Average Loss: 0.0005885750870220363
Total Loss of this k_epoch: -0.0018089571967720985






Entropy of this k_epoch: 0.04626273363828659
Average policy_loss of this k_epoch: 0.0004388950765132904
KL Divergence Average Loss: 0.0011587627232074738
Total Loss of this k_epoch: -0.0018626541132107377


Entropy of this k_epoch: 0.04847114533185959
Average policy_loss of this k_epoch: 0.0005100555717945099
KL Divergence Average Loss: 0.0013393248664215207
Total Loss of this k_epoch: -0.0019001084147021174






Entropy of this k_epoch: 0.05048752203583717
Average policy_loss of this k_epoch: 0.0005806982517242432
KL Divergence Average Loss: 0.001793351024389267
Total Loss of this k_epoch: -0.0019257443491369486






Entropy of this k_epoch: 0.050966933369636536
Average policy_loss of this k_epoch: 0.0005954429507255554
KL Divergence Average Loss: 0.0018205358646810055
Total Loss of this k_epoch: -0.0019346984336152673


Entropy of this k_epoch: 0.05740704387426376
Average policy_loss of this k_epoch: 0.0008158311247825623
KL Divergence Average Loss: 0.003022767137736082
Total Loss of this k_epoch: -0.0020242934115231037


Entropy of this k_epoch: 0.061612628400325775
Average policy_loss of this k_epoch: 0.0009626224637031555
KL Divergence Average Loss: 0.0039015724323689938
Total Loss of this k_epoch: -0.002078993245959282






Entropy of this k_epoch: 0.066109299659729
Average policy_loss of this k_epoch: 0.0011249370872974396
KL Divergence Average Loss: 0.005038907751441002
Total Loss of this k_epoch: -0.00213013868778944


Entropy of this k_epoch: 0.06512594223022461
Average policy_loss of this k_epoch: 0.0010996013879776
KL Divergence Average Loss: 0.005099371075630188
Total Loss of this k_epoch: -0.002105702180415392






Entropy of this k_epoch: 0.07011434435844421
Average policy_loss of this k_epoch: 0.0012732930481433868
KL Divergence Average Loss: 0.006137360818684101
Total Loss of this k_epoch: -0.0021710507571697235






Entropy of this k_epoch: 0.07682320475578308
Average policy_loss of this k_epoch: 0.0015321969985961914
KL Divergence Average Loss: 0.008446933701634407
Total Loss of this k_epoch: -0.002224494004622102


Entropy of this k_epoch: 0.07897515594959259
Average policy_loss of this k_epoch: 0.0016239173710346222
KL Divergence Average Loss: 0.009502625092864037
Total Loss of this k_epoch: -0.0022298144176602364


Entropy of this k_epoch: 0.08783654868602753
Average policy_loss of this k_epoch: 0.001975640654563904
KL Divergence Average Loss: 0.012686182744801044
Total Loss of this k_epoch: -0.0022893252316862345






Entropy of this k_epoch: 0.09612296521663666
Average policy_loss of this k_epoch: 0.0023217424750328064
KL Divergence Average Loss: 0.016531039029359818
Total Loss of this k_epoch: -0.0023190954234451056


Entropy of this k_epoch: 0.103993721306324
Average policy_loss of this k_epoch: 0.002667948603630066
KL Divergence Average Loss: 0.020551182329654694
Total Loss of this k_epoch: -0.0023262256290763617






Entropy of this k_epoch: 0.1037517786026001
Average policy_loss of this k_epoch: 0.002656828612089157
KL Divergence Average Loss: 0.02040959522128105
Total Loss of this k_epoch: -0.002326664747670293






Entropy of this k_epoch: 0.1112748235464096
Average policy_loss of this k_epoch: 0.0030002035200595856
KL Divergence Average Loss: 0.024846971035003662
Total Loss of this k_epoch: -0.002315067918971181


Entropy of this k_epoch: 0.1160886362195015
Average policy_loss of this k_epoch: 0.003224492073059082
KL Divergence Average Loss: 0.02777229994535446
Total Loss of this k_epoch: -0.0023022170644253492


Entropy of this k_epoch: 0.11676401644945145
Average policy_loss of this k_epoch: 0.0032798945903778076
KL Divergence Average Loss: 0.02907397970557213
Total Loss of this k_epoch: -0.0022675665095448494






Entropy of this k_epoch: 0.11705050617456436
Average policy_loss of this k_epoch: 0.0032883137464523315
KL Divergence Average Loss: 0.02898821420967579
Total Loss of this k_epoch: -0.002274329774081707


Entropy of this k_epoch: 0.11658443510532379
Average policy_loss of this k_epoch: 0.003265898674726486
KL Divergence Average Loss: 0.028612863272428513
Total Loss of this k_epoch: -0.002277194755151868






Entropy of this k_epoch: 0.11774404346942902
Average policy_loss of this k_epoch: 0.0033086612820625305
KL Divergence Average Loss: 0.029065841808915138
Total Loss of this k_epoch: -0.0022878828458487988






Entropy of this k_epoch: 0.12145677208900452
Average policy_loss of this k_epoch: 0.003491383045911789
KL Divergence Average Loss: 0.03165499120950699
Total Loss of this k_epoch: -0.002264905720949173


Entropy of this k_epoch: 0.1174943745136261
Average policy_loss of this k_epoch: 0.003313623368740082
KL Divergence Average Loss: 0.029348041862249374
Total Loss of this k_epoch: -0.002267615171149373


Entropy of this k_epoch: 0.11954009532928467
Average policy_loss of this k_epoch: 0.0033922456204891205
KL Divergence Average Loss: 0.03016388602554798
Total Loss of this k_epoch: -0.002283120295032859






Entropy of this k_epoch: 0.12000719457864761
Average policy_loss of this k_epoch: 0.0034281015396118164
KL Divergence Average Loss: 0.03075573593378067
Total Loss of this k_epoch: -0.0022647010628134012


Entropy of this k_epoch: 0.11172117292881012
Average policy_loss of this k_epoch: 0.0030295327305793762
KL Divergence Average Loss: 0.025371404364705086
Total Loss of this k_epoch: -0.002302811946719885






Entropy of this k_epoch: 0.11042236536741257
Average policy_loss of this k_epoch: 0.002979777753353119
KL Divergence Average Loss: 0.024887964129447937
Total Loss of this k_epoch: -0.002292460761964321






Entropy of this k_epoch: 0.11066485941410065
Average policy_loss of this k_epoch: 0.0029846206307411194
KL Divergence Average Loss: 0.02482043392956257
Total Loss of this k_epoch: -0.002300417982041836


Entropy of this k_epoch: 0.10814222693443298
Average policy_loss of this k_epoch: 0.0028826408088207245
KL Divergence Average Loss: 0.023775441572070122
Total Loss of this k_epoch: -0.002286716131493449


Entropy of this k_epoch: 0.11035635322332382
Average policy_loss of this k_epoch: 0.0029637105762958527
KL Divergence Average Loss: 0.024389781057834625
Total Loss of this k_epoch: -0.002310209209099412






Entropy of this k_epoch: 0.10151468217372894
Average policy_loss of this k_epoch: 0.002579301595687866
KL Divergence Average Loss: 0.01993010751903057
Total Loss of this k_epoch: -0.0022971313446760178


Entropy of this k_epoch: 0.10359100997447968
Average policy_loss of this k_epoch: 0.0026561617851257324
KL Divergence Average Loss: 0.02054588496685028
Total Loss of this k_epoch: -0.0023179298732429743






Entropy of this k_epoch: 0.10421770811080933
Average policy_loss of this k_epoch: 0.002675991505384445
KL Divergence Average Loss: 0.02060610055923462
Total Loss of this k_epoch: -0.0023288328666239977






Entropy of this k_epoch: 0.10074000060558319
Average policy_loss of this k_epoch: 0.0025308318436145782
KL Divergence Average Loss: 0.019008051604032516
Total Loss of this k_epoch: -0.002316087484359741


Entropy of this k_epoch: 0.09826406836509705
Average policy_loss of this k_epoch: 0.0024290308356285095
KL Divergence Average Loss: 0.01801144704222679
Total Loss of this k_epoch: -0.0023040580563247204


Entropy of this k_epoch: 0.09570316225290298
Average policy_loss of this k_epoch: 0.0023069605231285095
KL Divergence Average Loss: 0.016417216509580612
Total Loss of this k_epoch: -0.0023140255361795425






Entropy of this k_epoch: 0.09805896878242493
Average policy_loss of this k_epoch: 0.0024066120386123657
KL Divergence Average Loss: 0.017381835728883743
Total Loss of this k_epoch: -0.002322518266737461


Entropy of this k_epoch: 0.09646016359329224
Average policy_loss of this k_epoch: 0.0023445896804332733
KL Divergence Average Loss: 0.01687879115343094
Total Loss of this k_epoch: -0.0023096303921192884






Entropy of this k_epoch: 0.09808924794197083
Average policy_loss of this k_epoch: 0.002412751317024231
KL Divergence Average Loss: 0.017604410648345947
Total Loss of this k_epoch: -0.0023156669922173023







Entropy of this k_epoch: 0.09687629342079163
Average policy_loss of this k_epoch: 0.0023642927408218384
KL Divergence Average Loss: 0.017215801402926445
Total Loss of this k_epoch: -0.0023073640186339617


Entropy of this k_epoch: 0.0980791449546814
Average policy_loss of this k_epoch: 0.002415604889392853
KL Divergence Average Loss: 0.01773233339190483
Total Loss of this k_epoch: -0.0023110290057957172



Epoch 23/32 (Inner K-Epochs):  72%|███████▏  | 46/64 [00:03<00:01, 12.38it/s][A


Entropy of this k_epoch: 0.09409850090742111
Average policy_loss of this k_epoch: 0.0022419430315494537
KL Divergence Average Loss: 0.015683693811297417
Total Loss of this k_epoch: -0.0023061451502144337






Entropy of this k_epoch: 0.0970180332660675
Average policy_loss of this k_epoch: 0.002368830144405365
KL Divergence Average Loss: 0.01715061068534851
Total Loss of this k_epoch: -0.0023105654399842024






Entropy of this k_epoch: 0.09892398118972778
Average policy_loss of this k_epoch: 0.0024574100971221924
KL Divergence Average Loss: 0.01828049123287201
Total Loss of this k_epoch: -0.002305984264239669


Entropy of this k_epoch: 0.09583927690982819
Average policy_loss of this k_epoch: 0.0023224428296089172
KL Divergence Average Loss: 0.016749823465943336
Total Loss of this k_epoch: -0.002302022883668542


Entropy of this k_epoch: 0.0982397049665451
Average policy_loss of this k_epoch: 0.00241774320602417
KL Divergence Average Loss: 0.01756896823644638
Total Loss of this k_epoch: -0.0023185524623841047






Entropy of this k_epoch: 0.0959867313504219
Average policy_loss of this k_epoch: 0.0023203380405902863
KL Divergence Average Loss: 0.016455382108688354
Total Loss of this k_epoch: -0.002314444864168763


Entropy of this k_epoch: 0.10301802307367325
Average policy_loss of this k_epoch: 0.0026233047246932983
KL Divergence Average Loss: 0.020028773695230484
Total Loss of this k_epoch: -0.0023273087572306395






Entropy of this k_epoch: 0.10155707597732544
Average policy_loss of this k_epoch: 0.0025715231895446777
KL Divergence Average Loss: 0.019653186202049255
Total Loss of this k_epoch: -0.00230979872867465






Entropy of this k_epoch: 0.1009979248046875
Average policy_loss of this k_epoch: 0.002549879252910614
KL Divergence Average Loss: 0.01942821592092514
Total Loss of this k_epoch: -0.0023057349026203156


Entropy of this k_epoch: 0.10309640318155289
Average policy_loss of this k_epoch: 0.0025983452796936035
KL Divergence Average Loss: 0.024121474474668503
Total Loss of this k_epoch: -0.002315260237082839


Entropy of this k_epoch: 0.10082709044218063
Average policy_loss of this k_epoch: 0.00253300741314888
KL Divergence Average Loss: 0.01907343417406082
Total Loss of this k_epoch: -0.0023176129907369614






Entropy of this k_epoch: 0.10061918199062347
Average policy_loss of this k_epoch: 0.002519659698009491
KL Divergence Average Loss: 0.018808644264936447
Total Loss of this k_epoch: -0.002323213266208768


Entropy of this k_epoch: 0.10594406723976135
Average policy_loss of this k_epoch: 0.0027625076472759247
KL Divergence Average Loss: 0.021923186257481575
Total Loss of this k_epoch: -0.002315463963896036






Entropy of this k_epoch: 0.10351917892694473
Average policy_loss of this k_epoch: 0.0026442036032676697
KL Divergence Average Loss: 0.020195167511701584
Total Loss of this k_epoch: -0.0023298035375773907






Entropy of this k_epoch: 0.1010286808013916
Average policy_loss of this k_epoch: 0.0025396645069122314
KL Divergence Average Loss: 0.019220054149627686
Total Loss of this k_epoch: -0.0023195690009742975


Entropy of this k_epoch: 0.10475221276283264
Average policy_loss of this k_epoch: 0.002712264657020569
KL Divergence Average Loss: 0.02136373333632946
Total Loss of this k_epoch: -0.002311708638444543


Entropy of this k_epoch: 0.10162099450826645
Average policy_loss of this k_epoch: 0.002569131553173065
KL Divergence Average Loss: 0.019449753686785698
Total Loss of this k_epoch: -0.0023174204397946596




Epoch 23/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.40it/s]


Entropy of this k_epoch: 0.10387670248746872
Average policy_loss of this k_epoch: 0.0026655904948711395
KL Divergence Average Loss: 0.020725250244140625
Total Loss of this k_epoch: -0.00232099206186831

Last k_epoch stats:
Loss: -0.0023210 | Ratio: 0.9786753 | Entropy Term: 0.1038767


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  72%|███████▏  | 23/32 [02:04<00:48,  5.41s/it]

Entire Validation Dataset Accuracy: 0.9375| 180.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.10946722328662872
Average policy_loss of this k_epoch: 0.016624197363853455
KL Divergence Average Loss: 0.0007026696694083512
Total Loss of this k_epoch: 0.011157861910760403






Entropy of this k_epoch: 0.0951697826385498
Average policy_loss of this k_epoch: -0.013665840029716492
KL Divergence Average Loss: 0.000511817866936326
Total Loss of this k_epoch: -0.018419209867715836






Entropy of this k_epoch: 0.08236435055732727
Average policy_loss of this k_epoch: -0.017665252089500427
KL Divergence Average Loss: 0.0011453949846327305
Total Loss of this k_epoch: -0.02177201583981514


Entropy of this k_epoch: 0.06638678908348083
Average policy_loss of this k_epoch: -0.019200697541236877
KL Divergence Average Loss: 0.003041725605726242
Total Loss of this k_epoch: -0.022489620372653008


Entropy of this k_epoch: 0.05199934169650078
Average policy_loss of this k_epoch: -0.019937023520469666
KL Divergence Average Loss: 0.005819711368530989
Total Loss of this k_epoch: -0.022478794679045677






Entropy of this k_epoch: 0.04752339795231819
Average policy_loss of this k_epoch: -0.020139656960964203
KL Divergence Average Loss: 0.006949513219296932
Total Loss of this k_epoch: -0.02244633063673973


Entropy of this k_epoch: 0.038078442215919495
Average policy_loss of this k_epoch: -0.020574860274791718
KL Divergence Average Loss: 0.009512683376669884
Total Loss of this k_epoch: -0.02238365449011326






Entropy of this k_epoch: 0.0322987399995327
Average policy_loss of this k_epoch: -0.020822271704673767
KL Divergence Average Loss: 0.011426317505538464
Total Loss of this k_epoch: -0.022322945296764374






Entropy of this k_epoch: 0.026046542450785637
Average policy_loss of this k_epoch: -0.021066270768642426
KL Divergence Average Loss: 0.013687826693058014
Total Loss of this k_epoch: -0.02223172038793564


Entropy of this k_epoch: 0.02539888396859169
Average policy_loss of this k_epoch: -0.021110713481903076
KL Divergence Average Loss: 0.013853628188371658
Total Loss of this k_epoch: -0.022242121398448944


Entropy of this k_epoch: 0.021322406828403473
Average policy_loss of this k_epoch: -0.02125827968120575
KL Divergence Average Loss: 0.015545034781098366
Total Loss of this k_epoch: -0.022168949246406555






Entropy of this k_epoch: 0.019900795072317123
Average policy_loss of this k_epoch: -0.021305464208126068
KL Divergence Average Loss: 0.0161119494587183
Total Loss of this k_epoch: -0.02213938534259796


Entropy of this k_epoch: 0.019095560535788536
Average policy_loss of this k_epoch: -0.02135501801967621
KL Divergence Average Loss: 0.016370069235563278
Total Loss of this k_epoch: -0.02214609645307064






Entropy of this k_epoch: 0.01612863317131996
Average policy_loss of this k_epoch: -0.02145548164844513
KL Divergence Average Loss: 0.017697731032967567
Total Loss of this k_epoch: -0.022084936499595642






Entropy of this k_epoch: 0.017053453251719475
Average policy_loss of this k_epoch: -0.02142065018415451
KL Divergence Average Loss: 0.017253676429390907
Total Loss of this k_epoch: -0.02210078574717045


Entropy of this k_epoch: 0.01573171094059944
Average policy_loss of this k_epoch: -0.021465964615345
KL Divergence Average Loss: 0.017888914793729782
Total Loss of this k_epoch: -0.022073661908507347


Entropy of this k_epoch: 0.015189334750175476
Average policy_loss of this k_epoch: -0.021483436226844788
KL Divergence Average Loss: 0.01812463253736496
Total Loss of this k_epoch: -0.022061657160520554






Entropy of this k_epoch: 0.015097837895154953
Average policy_loss of this k_epoch: -0.021489568054676056
KL Divergence Average Loss: 0.018150651827454567
Total Loss of this k_epoch: -0.02206295356154442


Entropy of this k_epoch: 0.015555950812995434
Average policy_loss of this k_epoch: -0.021470166742801666
KL Divergence Average Loss: 0.017948344349861145
Total Loss of this k_epoch: -0.022068481892347336






Entropy of this k_epoch: 0.014327522367238998
Average policy_loss of this k_epoch: -0.0215146541595459
KL Divergence Average Loss: 0.018525544553995132
Total Loss of this k_epoch: -0.02204577624797821






Entropy of this k_epoch: 0.015802443027496338
Average policy_loss of this k_epoch: -0.02145201712846756
KL Divergence Average Loss: 0.017834730446338654
Total Loss of this k_epoch: -0.02206379361450672


Entropy of this k_epoch: 0.015376899391412735
Average policy_loss of this k_epoch: -0.021466106176376343
KL Divergence Average Loss: 0.01805775985121727
Total Loss of this k_epoch: -0.02205437421798706


Entropy of this k_epoch: 0.014088316820561886
Average policy_loss of this k_epoch: -0.02152217924594879
KL Divergence Average Loss: 0.01864585280418396
Total Loss of this k_epoch: -0.02204013615846634






Entropy of this k_epoch: 0.01440802775323391
Average policy_loss of this k_epoch: -0.021503716707229614
KL Divergence Average Loss: 0.018952490761876106
Total Loss of this k_epoch: -0.022034594789147377


Entropy of this k_epoch: 0.014853193424642086
Average policy_loss of this k_epoch: -0.02149099111557007
KL Divergence Average Loss: 0.01830383390188217
Total Loss of this k_epoch: -0.022050611674785614






Entropy of this k_epoch: 0.015450149774551392
Average policy_loss of this k_epoch: -0.021472230553627014
KL Divergence Average Loss: 0.01799444854259491
Total Loss of this k_epoch: -0.02206479385495186






Entropy of this k_epoch: 0.016596367582678795
Average policy_loss of this k_epoch: -0.02142864465713501
KL Divergence Average Loss: 0.017492122948169708
Total Loss of this k_epoch: -0.022083541378378868


Entropy of this k_epoch: 0.01761060208082199
Average policy_loss of this k_epoch: -0.02139320969581604
KL Divergence Average Loss: 0.017054127529263496
Total Loss of this k_epoch: -0.022103197872638702


Entropy of this k_epoch: 0.018193591386079788
Average policy_loss of this k_epoch: -0.021376147866249084
KL Divergence Average Loss: 0.016775965690612793
Total Loss of this k_epoch: -0.022118069231510162






Entropy of this k_epoch: 0.018682867288589478
Average policy_loss of this k_epoch: -0.021361596882343292
KL Divergence Average Loss: 0.016578033566474915
Total Loss of this k_epoch: -0.022129958495497704


Entropy of this k_epoch: 0.017532985657453537
Average policy_loss of this k_epoch: -0.021402865648269653
KL Divergence Average Loss: 0.017081357538700104
Total Loss of this k_epoch: -0.02210870012640953






Entropy of this k_epoch: 0.018775418400764465
Average policy_loss of this k_epoch: -0.02134515345096588
KL Divergence Average Loss: 0.016637735068798065
Total Loss of this k_epoch: -0.022117547690868378






Entropy of this k_epoch: 0.01857956126332283
Average policy_loss of this k_epoch: -0.021357335150241852
KL Divergence Average Loss: 0.016680337488651276
Total Loss of this k_epoch: -0.022119509056210518


Entropy of this k_epoch: 0.02069506235420704
Average policy_loss of this k_epoch: -0.021285489201545715
KL Divergence Average Loss: 0.015724066644906998
Total Loss of this k_epoch: -0.02216300182044506


Entropy of this k_epoch: 0.021419133991003036
Average policy_loss of this k_epoch: -0.02126370370388031
KL Divergence Average Loss: 0.015410227701067924
Total Loss of this k_epoch: -0.022180557250976562






Entropy of this k_epoch: 0.02572079747915268
Average policy_loss of this k_epoch: -0.021089158952236176
KL Divergence Average Loss: 0.013856747187674046
Total Loss of this k_epoch: -0.02223663032054901


Entropy of this k_epoch: 0.024337023496627808
Average policy_loss of this k_epoch: -0.02114161103963852
KL Divergence Average Loss: 0.014344352297484875
Total Loss of this k_epoch: -0.022215018048882484






Entropy of this k_epoch: 0.023936938494443893
Average policy_loss of this k_epoch: -0.021164879202842712
KL Divergence Average Loss: 0.014451137743890285
Total Loss of this k_epoch: -0.022217214107513428






Entropy of this k_epoch: 0.02537311613559723
Average policy_loss of this k_epoch: -0.021111048758029938
KL Divergence Average Loss: 0.013933103531599045
Total Loss of this k_epoch: -0.022240372374653816


Entropy of this k_epoch: 0.02614482492208481
Average policy_loss of this k_epoch: -0.021089687943458557
KL Divergence Average Loss: 0.013678109273314476
Total Loss of this k_epoch: -0.0222601480782032


Entropy of this k_epoch: 0.028643546625971794
Average policy_loss of this k_epoch: -0.0209786519408226
KL Divergence Average Loss: 0.012738020159304142
Total Loss of this k_epoch: -0.02228344790637493






Entropy of this k_epoch: 0.03128574788570404
Average policy_loss of this k_epoch: -0.020866595208644867
KL Divergence Average Loss: 0.011712998151779175
Total Loss of this k_epoch: -0.02231375128030777


Entropy of this k_epoch: 0.031925052404403687
Average policy_loss of this k_epoch: -0.02082664519548416
KL Divergence Average Loss: 0.011585740372538567
Total Loss of this k_epoch: -0.02230704203248024


Entropy of this k_epoch: 0.034399136900901794




Average policy_loss of this k_epoch: -0.020723558962345123
KL Divergence Average Loss: 0.010773144662380219
Total Loss of this k_epoch: -0.022335784509778023


Entropy of this k_epoch: 0.03334925323724747
Average policy_loss of this k_epoch: -0.020779035985469818
KL Divergence Average Loss: 0.011207746341824532
Total Loss of this k_epoch: -0.022334421053528786


Entropy of this k_epoch: 0.038801223039627075
Average policy_loss of this k_epoch: -0.02055121213197708
KL Divergence Average Loss: 0.00932399183511734
Total Loss of this k_epoch: -0.022398032248020172






Entropy of this k_epoch: 0.036666762083768845
Average policy_loss of this k_epoch: -0.020633190870285034
KL Divergence Average Loss: 0.01009042002260685
Total Loss of this k_epoch: -0.022365624085068703


Entropy of this k_epoch: 0.04139947146177292
Average policy_loss of this k_epoch: -0.020435549318790436
KL Divergence Average Loss: 0.00865420512855053
Total Loss of this k_epoch: -0.022418981418013573


Entropy of this k_epoch: 0.04182378202676773
Average policy_loss of this k_epoch: -0.02040233463048935
KL Divergence Average Loss: 0.008609095588326454
Total Loss of this k_epoch: -0.02240743301808834






Entropy of this k_epoch: 0.04612535238265991
Average policy_loss of this k_epoch: -0.020202361047267914
KL Divergence Average Loss: 0.0074129486456513405
Total Loss of this k_epoch: -0.022434499114751816


Entropy of this k_epoch: 0.046343907713890076
Average policy_loss of this k_epoch: -0.020187050104141235
KL Divergence Average Loss: 0.007400894537568092
Total Loss of this k_epoch: -0.022430237382650375


Entropy of this k_epoch: 0.04698554426431656
Average policy_loss of this k_epoch: -0.02014501392841339
KL Divergence Average Loss: 0.007139044813811779
Total Loss of this k_epoch: -0.022422902286052704






Entropy of this k_epoch: 0.05102658271789551
Average policy_loss of this k_epoch: -0.01996559649705887
KL Divergence Average Loss: 0.006009540520608425
Total Loss of this k_epoch: -0.022456830367445946


Entropy of this k_epoch: 0.05367296189069748
Average policy_loss of this k_epoch: -0.019809484481811523
KL Divergence Average Loss: 0.005567560903728008
Total Loss of this k_epoch: -0.022437456995248795


Entropy of this k_epoch: 0.05386809632182121
Average policy_loss of this k_epoch: -0.019826509058475494
KL Divergence Average Loss: 0.00543505884706974
Total Loss of this k_epoch: -0.02246556431055069







Entropy of this k_epoch: 0.05667338892817497
Average policy_loss of this k_epoch: -0.019677743315696716
KL Divergence Average Loss: 0.005001303739845753
Total Loss of this k_epoch: -0.02246139943599701


Entropy of this k_epoch: 0.06234708055853844
Average policy_loss of this k_epoch: -0.01939738541841507
KL Divergence Average Loss: 0.004054732155054808
Total Loss of this k_epoch: -0.022474192082881927


Entropy of this k_epoch: 0.06114128604531288
Average policy_loss of this k_epoch: -0.019483789801597595
KL Divergence Average Loss: 0.0041132643818855286
Total Loss of this k_epoch: -0.022499721497297287



Epoch 24/32 (Inner K-Epochs):  91%|█████████ | 58/64 [00:04<00:00, 12.23it/s][A


Entropy of this k_epoch: 0.06264156103134155
Average policy_loss of this k_epoch: -0.01937081664800644
KL Divergence Average Loss: 0.0038879599887877703
Total Loss of this k_epoch: -0.02246401645243168


Entropy of this k_epoch: 0.06367643177509308
Average policy_loss of this k_epoch: -0.01936313509941101
KL Divergence Average Loss: 0.0034970880951732397
Total Loss of this k_epoch: -0.022511985152959824






Entropy of this k_epoch: 0.06254656612873077
Average policy_loss of this k_epoch: -0.019399911165237427
KL Divergence Average Loss: 0.003569148713722825
Total Loss of this k_epoch: -0.02249154821038246


Entropy of this k_epoch: 0.06813818961381912
Average policy_loss of this k_epoch: -0.019083723425865173
KL Divergence Average Loss: 0.0029134154319763184
Total Loss of this k_epoch: -0.022461500018835068


Entropy of this k_epoch: 0.0659153014421463
Average policy_loss of this k_epoch: -0.019227325916290283
KL Divergence Average Loss: 0.0030380855314433575
Total Loss of this k_epoch: -0.022492708638310432




Epoch 24/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.42it/s]
>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  75%|███████▌  | 24/32 [02:10<00:43,  5.41s/it]

Entropy of this k_epoch: 0.0720275342464447
Average policy_loss of this k_epoch: -0.018751762807369232
KL Divergence Average Loss: 0.004434600006788969
Total Loss of this k_epoch: -0.022308792918920517

Last k_epoch stats:
Loss: -0.0223088 | Ratio: 1.0022618 | Entropy Term: 0.0720275
Entire Validation Dataset Accuracy: 0.9323| 179.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.06713332235813141
Average policy_loss of this k_epoch: 0.00016725808382034302
KL Divergence Average Loss: 0.0005941572599112988
Total Loss of this k_epoch: -0.0031834663823246956


Entropy of this k_epoch: 0.06868916004896164
Average policy_loss of this k_epoch: 0.0002158135175704956
KL Divergence Average Loss: 0.0005618780851364136
Total Loss of this k_epoch: -0.0032130256295204163






Entropy of this k_epoch: 0.07033689320087433
Average policy_loss of this k_epoch: 0.00028228387236595154
KL Divergence Average Loss: 0.0006307557923719287
Total Loss of this k_epoch: -0.0032282532192766666


Entropy of this k_epoch: 0.07208666205406189
Average policy_loss of this k_epoch: 0.00034610554575920105
KL Divergence Average Loss: 0.000629293848760426
Total Loss of this k_epoch: -0.0032519346568733454


Entropy of this k_epoch: 0.07206372916698456
Average policy_loss of this k_epoch: 0.0003454163670539856
KL Divergence Average Loss: 0.0006170106353238225
Total Loss of this k_epoch: -0.0032516000792384148






Entropy of this k_epoch: 0.07842224836349487
Average policy_loss of this k_epoch: 0.0005918703973293304
KL Divergence Average Loss: 0.0009780299151316285
Total Loss of this k_epoch: -0.0033194618299603462


Entropy of this k_epoch: 0.07606929540634155
Average policy_loss of this k_epoch: 0.000504128634929657
KL Divergence Average Loss: 0.0009980469476431608
Total Loss of this k_epoch: -0.0032893556635826826


Entropy of this k_epoch: 0.07918260991573334
Average policy_loss of this k_epoch: 0.0006322748959064484
KL Divergence Average Loss: 0.0013601952232420444
Total Loss of this k_epoch: -0.0033132536336779594






Entropy of this k_epoch: 0.08329922705888748
Average policy_loss of this k_epoch: 0.0007951781153678894
KL Divergence Average Loss: 0.0016677731182426214
Total Loss of this k_epoch: -0.0033531058579683304


Entropy of this k_epoch: 0.09392377734184265
Average policy_loss of this k_epoch: 0.0012467093765735626
KL Divergence Average Loss: 0.003275458235293627
Total Loss of this k_epoch: -0.003416724968701601


Entropy of this k_epoch: 0.09179957211017609
Average policy_loss of this k_epoch: 0.0011625215411186218
KL Divergence Average Loss: 0.0032494894694536924
Total Loss of this k_epoch: -0.003394962288439274






Entropy of this k_epoch: 0.09771491587162018
Average policy_loss of this k_epoch: 0.001414559781551361
KL Divergence Average Loss: 0.004172099754214287
Total Loss of this k_epoch: -0.0034294649958610535


Entropy of this k_epoch: 0.10121984779834747
Average policy_loss of this k_epoch: 0.001576833426952362
KL Divergence Average Loss: 0.005214447155594826
Total Loss of this k_epoch: -0.003432014724239707


Entropy of this k_epoch: 0.10940077155828476
Average policy_loss of this k_epoch: 0.001950196921825409
KL Divergence Average Loss: 0.007224635686725378
Total Loss of this k_epoch: -0.0034475955180823803






Entropy of this k_epoch: 0.11388396471738815
Average policy_loss of this k_epoch: 0.0021711960434913635
KL Divergence Average Loss: 0.008679768070578575
Total Loss of this k_epoch: -0.003436204744502902


Entropy of this k_epoch: 0.11483010649681091
Average policy_loss of this k_epoch: 0.002202272415161133
KL Divergence Average Loss: 0.008674902841448784
Total Loss of this k_epoch: -0.003452484030276537


Entropy of this k_epoch: 0.1173483207821846
Average policy_loss of this k_epoch: 0.0023552849888801575
KL Divergence Average Loss: 0.010228854604065418
Total Loss of this k_epoch: -0.0034098424948751926






Entropy of this k_epoch: 0.1288890838623047
Average policy_loss of this k_epoch: 0.0028996020555496216
KL Divergence Average Loss: 0.013620731420814991
Total Loss of this k_epoch: -0.003408644814044237


Entropy of this k_epoch: 0.12329693883657455
Average policy_loss of this k_epoch: 0.002626616507768631
KL Divergence Average Loss: 0.011613493785262108
Total Loss of this k_epoch: -0.003422095440328121


Entropy of this k_epoch: 0.13500253856182098
Average policy_loss of this k_epoch: 0.0032510533928871155
KL Divergence Average Loss: 0.016984809190034866
Total Loss of this k_epoch: -0.0033292253501713276






Entropy of this k_epoch: 0.1339074820280075
Average policy_loss of this k_epoch: 0.0033258795738220215
KL Divergence Average Loss: 0.0910828560590744
Total Loss of this k_epoch: -0.0024586664512753487


Entropy of this k_epoch: 0.1230868399143219
Average policy_loss of this k_epoch: 0.002611130475997925
KL Divergence Average Loss: 0.011552425101399422
Total Loss of this k_epoch: -0.003427687333896756


Entropy of this k_epoch: 0.12127485871315002
Average policy_loss of this k_epoch: 0.0042864009737968445
KL Divergence Average Loss: 0.060290154069662094
Total Loss of this k_epoch: -0.0011744406074285507






Entropy of this k_epoch: 0.13202503323554993
Average policy_loss of this k_epoch: 0.0030666515231132507
KL Divergence Average Loss: 0.01508539542555809
Total Loss of this k_epoch: -0.003383746137842536


Entropy of this k_epoch: 0.12251242995262146
Average policy_loss of this k_epoch: 0.0025757476687431335
KL Divergence Average Loss: 0.01131974533200264
Total Loss of this k_epoch: -0.0034366764593869448


Entropy of this k_epoch: 0.12810134887695312
Average policy_loss of this k_epoch: 0.0028627775609493256
KL Divergence Average Loss: 0.013512754812836647
Total Loss of this k_epoch: -0.0034071626141667366






Entropy of this k_epoch: 0.11809558421373367
Average policy_loss of this k_epoch: 0.0023554228246212006
KL Divergence Average Loss: 0.009563210420310497
Total Loss of this k_epoch: -0.003453724319115281


Entropy of this k_epoch: 0.12186890840530396
Average policy_loss of this k_epoch: 0.0025440678000450134
KL Divergence Average Loss: 0.010839032009243965
Total Loss of this k_epoch: -0.00344098755158484


Entropy of this k_epoch: 0.11634515225887299
Average policy_loss of this k_epoch: 0.002284727990627289
KL Divergence Average Loss: 0.009405525401234627
Total Loss of this k_epoch: -0.0034384746104478836






Entropy of this k_epoch: 0.12022832036018372
Average policy_loss of this k_epoch: 0.002663053572177887
KL Divergence Average Loss: 0.015758683905005455
Total Loss of this k_epoch: -0.003190775867551565


Entropy of this k_epoch: 0.11729297041893005
Average policy_loss of this k_epoch: 0.002352282404899597
KL Divergence Average Loss: 0.010425621643662453
Total Loss of this k_epoch: -0.003408110002055764


Entropy of this k_epoch: 0.11860615015029907
Average policy_loss of this k_epoch: 0.002716667950153351
KL Divergence Average Loss: 0.0190372746437788
Total Loss of this k_epoch: -0.003023267025128007






Entropy of this k_epoch: 0.09842300415039062
Average policy_loss of this k_epoch: 0.0014486759901046753
KL Divergence Average Loss: 0.004427892155945301
Total Loss of this k_epoch: -0.0034281956031918526


Entropy of this k_epoch: 0.09244107455015182
Average policy_loss of this k_epoch: 0.0011793673038482666
KL Divergence Average Loss: 0.00297813699580729
Total Loss of this k_epoch: -0.0034129051491618156


Entropy of this k_epoch: 0.09690877795219421
Average policy_loss of this k_epoch: 0.001388508826494217
KL Divergence Average Loss: 0.004300120286643505
Total Loss of this k_epoch: -0.0034139289055019617






Entropy of this k_epoch: 0.0942678153514862
Average policy_loss of this k_epoch: 0.0012661926448345184
KL Divergence Average Loss: 0.003558989381417632
Total Loss of this k_epoch: -0.0034116082824766636


Entropy of this k_epoch: 0.09001446515321732
Average policy_loss of this k_epoch: 0.00107574462890625
KL Divergence Average Loss: 0.002665403299033642
Total Loss of this k_epoch: -0.003398324828594923


Entropy of this k_epoch: 0.09231806546449661
Average policy_loss of this k_epoch: 0.0011737868189811707
KL Divergence Average Loss: 0.003140287008136511
Total Loss of this k_epoch: -0.0034107137471437454






Entropy of this k_epoch: 0.08972596377134323
Average policy_loss of this k_epoch: 0.0010584890842437744
KL Divergence Average Loss: 0.0024269933346658945
Total Loss of this k_epoch: -0.003403539303690195


Entropy of this k_epoch: 0.08841364085674286
Average policy_loss of this k_epoch: 0.0010028108954429626
KL Divergence Average Loss: 0.002294606063514948
Total Loss of this k_epoch: -0.0033949255011975765


Entropy of this k_epoch: 0.0907118171453476
Average policy_loss of this k_epoch: 0.0011057183146476746
KL Divergence Average Loss: 0.0027598822489380836
Total Loss of this k_epoch: -0.0034022736363112926






Entropy of this k_epoch: 0.09528085589408875
Average policy_loss of this k_epoch: 0.0013092570006847382
KL Divergence Average Loss: 0.0038152739871293306
Total Loss of this k_epoch: -0.0034166330005973577


Entropy of this k_epoch: 0.09388422966003418
Average policy_loss of this k_epoch: 0.0012486092746257782
KL Divergence Average Loss: 0.003418183419853449
Total Loss of this k_epoch: -0.0034114206209778786


Entropy of this k_epoch: 0.09579174220561981
Average policy_loss of this k_epoch: 0.0013250187039375305
KL Divergence Average Loss: 0.0037482178304344416
Total Loss of this k_epoch: -0.0034270863980054855






Entropy of this k_epoch: 0.10085711628198624
Average policy_loss of this k_epoch: 0.001543622463941574
KL Divergence Average Loss: 0.004587321542203426
Total Loss of this k_epoch: -0.00345336040481925


Entropy of this k_epoch: 0.09819847345352173
Average policy_loss of this k_epoch: 0.001433592289686203
KL Divergence Average Loss: 0.004440242424607277
Total Loss of this k_epoch: -0.0034319290425628424


Entropy of this k_epoch: 0.09848442673683167
Average policy_loss of this k_epoch: 0.001444011926651001
KL Divergence Average Loss: 0.004369980189949274
Total Loss of this k_epoch: -0.0034365097526460886






Entropy of this k_epoch: 0.1018739640712738
Average policy_loss of this k_epoch: 0.001606278121471405
KL Divergence Average Loss: 0.005207092501223087
Total Loss of this k_epoch: -0.0034353493247181177


Entropy of this k_epoch: 0.10488780587911606
Average policy_loss of this k_epoch: 0.0017301812767982483
KL Divergence Average Loss: 0.005775280762463808
Total Loss of this k_epoch: -0.0034564565867185593


Entropy of this k_epoch: 0.10964927077293396
Average policy_loss of this k_epoch: 0.001941390335559845
KL Divergence Average Loss: 0.006705594249069691
Total Loss of this k_epoch: -0.003474017372354865






Entropy of this k_epoch: 0.11131710559129715
Average policy_loss of this k_epoch: 0.0020345933735370636
KL Divergence Average Loss: 0.007662528194487095
Total Loss of this k_epoch: -0.003454636549577117


Entropy of this k_epoch: 0.11374212801456451
Average policy_loss of this k_epoch: 0.0021402835845947266
KL Divergence Average Loss: 0.008013435639441013
Total Loss of this k_epoch: -0.0034666885621845722


Entropy of this k_epoch: 0.11548970639705658
Average policy_loss of this k_epoch: 0.0022257640957832336
KL Divergence Average Loss: 0.009227924048900604
Total Loss of this k_epoch: -0.0034564421512186527






Entropy of this k_epoch: 0.11727754771709442
Average policy_loss of this k_epoch: 0.002318359911441803
KL Divergence Average Loss: 0.00943729467689991
Total Loss of this k_epoch: -0.0034511445555835962


Entropy of this k_epoch: 0.11344032734632492
Average policy_loss of this k_epoch: 0.0021309666335582733
KL Divergence Average Loss: 0.008068211376667023
Total Loss of this k_epoch: -0.0034603679087013006


Entropy of this k_epoch: 0.11731817573308945
Average policy_loss of this k_epoch: 0.002309061586856842
KL Divergence Average Loss: 0.009102568961679935
Total Loss of this k_epoch: -0.003465821500867605






Entropy of this k_epoch: 0.12018848955631256
Average policy_loss of this k_epoch: 0.002453964203596115
KL Divergence Average Loss: 0.010480988770723343
Total Loss of this k_epoch: -0.0034506504889577627


Entropy of this k_epoch: 0.11602245271205902
Average policy_loss of this k_epoch: 0.002247154712677002
KL Divergence Average Loss: 0.008649762719869614
Total Loss of this k_epoch: -0.003467470407485962


Entropy of this k_epoch: 0.12228626012802124
Average policy_loss of this k_epoch: 0.002520039677619934
KL Divergence Average Loss: 0.01639556512236595
Total Loss of this k_epoch: -0.003430317621678114






Entropy of this k_epoch: 0.11798103898763657
Average policy_loss of this k_epoch: 0.0023514851927757263
KL Divergence Average Loss: 0.00959715899080038
Total Loss of this k_epoch: -0.00345159531570971


Entropy of this k_epoch: 0.12077299505472183
Average policy_loss of this k_epoch: 0.002482481300830841
KL Divergence Average Loss: 0.01035794336348772
Total Loss of this k_epoch: -0.0034525892697274685


Entropy of this k_epoch: 0.12223247438669205
Average policy_loss of this k_epoch: 0.0025640875101089478
KL Divergence Average Loss: 0.011112933978438377
Total Loss of this k_epoch: -0.0034364068415015936




Epoch 25/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.47it/s]


Entropy of this k_epoch: 0.11263324320316315
Average policy_loss of this k_epoch: 0.002091366797685623
KL Divergence Average Loss: 0.007871514186263084
Total Loss of this k_epoch: -0.00346158049069345


Entropy of this k_epoch: 0.11526467651128769
Average policy_loss of this k_epoch: 0.0022181160748004913
KL Divergence Average Loss: 0.008580147288739681
Total Loss of this k_epoch: -0.0034593166783452034

Last k_epoch stats:
Loss: -0.0034593 | Ratio: 0.9822546 | Entropy Term: 0.1152647


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  78%|███████▊  | 25/32 [02:15<00:37,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.1114131435751915
Average policy_loss of this k_epoch: 0.006479211151599884
KL Divergence Average Loss: 0.0006733303889632225
Total Loss of this k_epoch: 0.0009152875281870365






Entropy of this k_epoch: 0.1029740422964096
Average policy_loss of this k_epoch: -0.016229405999183655
KL Divergence Average Loss: 0.0003950369718950242
Total Loss of this k_epoch: -0.021374158561229706






Entropy of this k_epoch: 0.08840714395046234
Average policy_loss of this k_epoch: -0.011773988604545593
KL Divergence Average Loss: 0.0012304461561143398
Total Loss of this k_epoch: -0.01618204079568386


Entropy of this k_epoch: 0.06679996848106384
Average policy_loss of this k_epoch: -0.019769027829170227
KL Divergence Average Loss: 0.004327986389398575
Total Loss of this k_epoch: -0.02306574583053589


Entropy of this k_epoch: 0.054849062114953995
Average policy_loss of this k_epoch: -0.020406603813171387
KL Divergence Average Loss: 0.007081937976181507
Total Loss of this k_epoch: -0.02307823672890663






Entropy of this k_epoch: 0.044533390551805496
Average policy_loss of this k_epoch: -0.020899198949337006
KL Divergence Average Loss: 0.010479666292667389
Total Loss of this k_epoch: -0.023021072149276733


Entropy of this k_epoch: 0.03622495010495186
Average policy_loss of this k_epoch: -0.02126103639602661
KL Divergence Average Loss: 0.012682750821113586
Total Loss of this k_epoch: -0.022945456206798553






Entropy of this k_epoch: 0.029116129502654076
Average policy_loss of this k_epoch: -0.021557599306106567
KL Divergence Average Loss: 0.01519952155649662
Total Loss of this k_epoch: -0.022861409932374954






Entropy of this k_epoch: 0.024734612554311752
Average policy_loss of this k_epoch: -0.021723605692386627
KL Divergence Average Loss: 0.017187323421239853
Total Loss of this k_epoch: -0.022788463160395622


Entropy of this k_epoch: 0.021475430577993393
Average policy_loss of this k_epoch: -0.02185748517513275
KL Divergence Average Loss: 0.018382441252470016
Total Loss of this k_epoch: -0.022747432813048363


Entropy of this k_epoch: 0.017901360988616943
Average policy_loss of this k_epoch: -0.02199537307024002
KL Divergence Average Loss: 0.019897859543561935
Total Loss of this k_epoch: -0.02269146218895912






Entropy of this k_epoch: 0.018574919551610947
Average policy_loss of this k_epoch: -0.021967634558677673
KL Divergence Average Loss: 0.021488510072231293
Total Loss of this k_epoch: -0.022681495174765587


Entropy of this k_epoch: 0.01604953035712242
Average policy_loss of this k_epoch: -0.022058971226215363
KL Divergence Average Loss: 0.021269116550683975
Total Loss of this k_epoch: -0.022648755460977554






Entropy of this k_epoch: 0.015688879415392876
Average policy_loss of this k_epoch: -0.022069208323955536
KL Divergence Average Loss: 0.022086545825004578
Total Loss of this k_epoch: -0.022632787004113197






Entropy of this k_epoch: 0.013994202017784119
Average policy_loss of this k_epoch: -0.022129304707050323
KL Divergence Average Loss: 0.023058272898197174
Total Loss of this k_epoch: -0.02259843237698078


Entropy of this k_epoch: 0.013008021749556065
Average policy_loss of this k_epoch: -0.0221603661775589
KL Divergence Average Loss: 0.022416651248931885
Total Loss of this k_epoch: -0.022586600854992867


Entropy of this k_epoch: 0.017655279487371445
Average policy_loss of this k_epoch: -0.021906934678554535
KL Divergence Average Loss: 0.03599870204925537
Total Loss of this k_epoch: -0.022429710254073143






Entropy of this k_epoch: 0.012878518551588058
Average policy_loss of this k_epoch: -0.022161051630973816
KL Divergence Average Loss: 0.022303862497210503
Total Loss of this k_epoch: -0.02258193865418434


Entropy of this k_epoch: 0.013563564047217369
Average policy_loss of this k_epoch: -0.022142551839351654
KL Divergence Average Loss: 0.021953042596578598
Total Loss of this k_epoch: -0.02260119840502739






Entropy of this k_epoch: 0.012201170437037945
Average policy_loss of this k_epoch: -0.022190093994140625
KL Divergence Average Loss: 0.022617943584918976
Total Loss of this k_epoch: -0.02257397398352623






Entropy of this k_epoch: 0.01095344964414835
Average policy_loss of this k_epoch: -0.022223912179470062
KL Divergence Average Loss: 0.02328488603234291
Total Loss of this k_epoch: -0.02253873459994793


Entropy of this k_epoch: 0.01296486146748066
Average policy_loss of this k_epoch: -0.022160857915878296
KL Divergence Average Loss: 0.022248918190598488
Total Loss of this k_epoch: -0.022586612030863762


Entropy of this k_epoch: 0.01252229418605566
Average policy_loss of this k_epoch: -0.022175557911396027
KL Divergence Average Loss: 0.02243300899863243
Total Loss of this k_epoch: -0.02257734350860119






Entropy of this k_epoch: 0.01181480847299099
Average policy_loss of this k_epoch: -0.022193245589733124
KL Divergence Average Loss: 0.022956594824790955
Total Loss of this k_epoch: -0.022554419934749603


Entropy of this k_epoch: 0.012853587046265602
Average policy_loss of this k_epoch: -0.022173777222633362
KL Divergence Average Loss: 0.023080792278051376
Total Loss of this k_epoch: -0.02258564904332161






Entropy of this k_epoch: 0.012785110622644424
Average policy_loss of this k_epoch: -0.022166669368743896
KL Divergence Average Loss: 0.022314200177788734
Total Loss of this k_epoch: -0.022582784295082092






Entropy of this k_epoch: 0.015221698209643364
Average policy_loss of this k_epoch: -0.02207876741886139
KL Divergence Average Loss: 0.02632606029510498
Total Loss of this k_epoch: -0.0225765909999609


Entropy of this k_epoch: 0.012321378104388714
Average policy_loss of this k_epoch: -0.022184550762176514
KL Divergence Average Loss: 0.02255796268582344
Total Loss of this k_epoch: -0.02257503941655159


Entropy of this k_epoch: 0.013606736436486244
Average policy_loss of this k_epoch: -0.022138580679893494
KL Divergence Average Loss: 0.021893097087740898
Total Loss of this k_epoch: -0.02259998582303524






Entropy of this k_epoch: 0.014506860636174679
Average policy_loss of this k_epoch: -0.022110924124717712
KL Divergence Average Loss: 0.021510345861315727
Total Loss of this k_epoch: -0.022621164098381996


Entropy of this k_epoch: 0.014385395683348179
Average policy_loss of this k_epoch: -0.02210797369480133
KL Divergence Average Loss: 0.021563410758972168
Total Loss of this k_epoch: -0.02261160872876644






Entropy of this k_epoch: 0.015003065578639507
Average policy_loss of this k_epoch: -0.022089309990406036
KL Divergence Average Loss: 0.021283939480781555
Total Loss of this k_epoch: -0.022626623511314392






Entropy of this k_epoch: 0.015139906667172909
Average policy_loss of this k_epoch: -0.022085905075073242
KL Divergence Average Loss: 0.021178346127271652
Total Loss of this k_epoch: -0.022631118074059486


Entropy of this k_epoch: 0.015279732644557953
Average policy_loss of this k_epoch: -0.022082023322582245
KL Divergence Average Loss: 0.021107742562890053
Total Loss of this k_epoch: -0.02263493277132511


Entropy of this k_epoch: 0.01708158478140831
Average policy_loss of this k_epoch: -0.022015288472175598
KL Divergence Average Loss: 0.020262014120817184
Total Loss of this k_epoch: -0.022666746750473976






Entropy of this k_epoch: 0.017941033467650414
Average policy_loss of this k_epoch: -0.021983183920383453
KL Divergence Average Loss: 0.019867239519953728
Total Loss of this k_epoch: -0.02268156409263611


Entropy of this k_epoch: 0.0187910795211792
Average policy_loss of this k_epoch: -0.02195686846971512
KL Divergence Average Loss: 0.019453417509794235
Total Loss of this k_epoch: -0.022701887413859367






Entropy of this k_epoch: 0.019883636385202408
Average policy_loss of this k_epoch: -0.021912530064582825
KL Divergence Average Loss: 0.018984897062182426
Total Loss of this k_epoch: -0.022716863080859184






Entropy of this k_epoch: 0.01911122351884842
Average policy_loss of this k_epoch: -0.021934501826763153
KL Divergence Average Loss: 0.019336478784680367
Total Loss of this k_epoch: -0.02269669808447361


Entropy of this k_epoch: 0.024117611348628998
Average policy_loss of this k_epoch: -0.02171560376882553
KL Divergence Average Loss: 0.032170504331588745
Total Loss of this k_epoch: -0.022599779069423676


Entropy of this k_epoch: 0.02143246680498123
Average policy_loss of this k_epoch: -0.02185121923685074
KL Divergence Average Loss: 0.018304254859685898
Total Loss of this k_epoch: -0.022739799693226814






Entropy of this k_epoch: 0.02355954796075821
Average policy_loss of this k_epoch: -0.021773435175418854
KL Divergence Average Loss: 0.017414551228284836
Total Loss of this k_epoch: -0.022777266800403595


Entropy of this k_epoch: 0.023653587326407433
Average policy_loss of this k_epoch: -0.02177276462316513
KL Divergence Average Loss: 0.01743815280497074
Total Loss of this k_epoch: -0.022781062871217728






Entropy of this k_epoch: 0.0255071222782135
Average policy_loss of this k_epoch: -0.02169795334339142
KL Divergence Average Loss: 0.016654424369335175
Total Loss of this k_epoch: -0.022806765511631966






Entropy of this k_epoch: 0.02695588394999504
Average policy_loss of this k_epoch: -0.021633468568325043
KL Divergence Average Loss: 0.01603296771645546
Total Loss of this k_epoch: -0.022820934653282166


Entropy of this k_epoch: 0.028427448123693466
Average policy_loss of this k_epoch: -0.02156713604927063
KL Divergence Average Loss: 0.015466412529349327
Total Loss of this k_epoch: -0.022833842784166336


Entropy of this k_epoch: 0.027533970773220062
Average policy_loss of this k_epoch: -0.021622009575366974
KL Divergence Average Loss: 0.015862129628658295
Total Loss of this k_epoch: -0.022840086370706558






Entropy of this k_epoch: 0.030259503051638603
Average policy_loss of this k_epoch: -0.021489374339580536
KL Divergence Average Loss: 0.014826069585978985
Total Loss of this k_epoch: -0.022854087874293327


Entropy of this k_epoch: 0.033649787306785583
Average policy_loss of this k_epoch: -0.02136937528848648
KL Divergence Average Loss: 0.013484496623277664
Total Loss of this k_epoch: -0.022917021065950394






Entropy of this k_epoch: 0.034685008227825165
Average policy_loss of this k_epoch: -0.02131114900112152
KL Divergence Average Loss: 0.013197986409068108
Total Loss of this k_epoch: -0.02291342057287693






Entropy of this k_epoch: 0.04253397136926651
Average policy_loss of this k_epoch: -0.02090597152709961
KL Divergence Average Loss: 0.011358809657394886
Total Loss of this k_epoch: -0.022919083014130592


Entropy of this k_epoch: 0.03889433294534683
Average policy_loss of this k_epoch: -0.02112790197134018
KL Divergence Average Loss: 0.01168888807296753
Total Loss of this k_epoch: -0.02295573055744171


Entropy of this k_epoch: 0.044131845235824585
Average policy_loss of this k_epoch: -0.02089250087738037
KL Divergence Average Loss: 0.01005705539137125
Total Loss of this k_epoch: -0.022998522967100143






Entropy of this k_epoch: 0.047111548483371735
Average policy_loss of this k_epoch: -0.020744554698467255
KL Divergence Average Loss: 0.009181389585137367
Total Loss of this k_epoch: -0.02300831861793995


Entropy of this k_epoch: 0.043913017958402634
Average policy_loss of this k_epoch: -0.020906031131744385
KL Divergence Average Loss: 0.010115927085280418
Total Loss of this k_epoch: -0.023000523447990417






Entropy of this k_epoch: 0.04665342718362808
Average policy_loss of this k_epoch: -0.02076682448387146
KL Divergence Average Loss: 0.00934709794819355
Total Loss of this k_epoch: -0.023006023839116096






Entropy of this k_epoch: 0.05290249362587929
Average policy_loss of this k_epoch: -0.020499348640441895
KL Divergence Average Loss: 0.007531770039349794
Total Loss of this k_epoch: -0.02306915447115898


Entropy of this k_epoch: 0.05153356119990349
Average policy_loss of this k_epoch: -0.020535126328468323
KL Divergence Average Loss: 0.00790014024823904
Total Loss of this k_epoch: -0.02303280308842659


Entropy of this k_epoch: 0.053189851343631744
Average policy_loss of this k_epoch: -0.02047092467546463
KL Divergence Average Loss: 0.007453433237969875
Total Loss of this k_epoch: -0.02305588312447071






Entropy of this k_epoch: 0.05353798717260361
Average policy_loss of this k_epoch: -0.02045130729675293
KL Divergence Average Loss: 0.00746153062209487
Total Loss of this k_epoch: -0.023053590208292007


Entropy of this k_epoch: 0.06639549136161804
Average policy_loss of this k_epoch: -0.018630310893058777
KL Divergence Average Loss: 0.02127169445157051
Total Loss of this k_epoch: -0.021737366914749146






Entropy of this k_epoch: 0.05918106064200401
Average policy_loss of this k_epoch: -0.020168349146842957
KL Divergence Average Loss: 0.00617948267608881
Total Loss of this k_epoch: -0.023065607994794846




Epoch 26/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.42it/s]


Entropy of this k_epoch: 0.06076641008257866
Average policy_loss of this k_epoch: -0.020047448575496674
KL Divergence Average Loss: 0.005780199076980352
Total Loss of this k_epoch: -0.023027967661619186


Entropy of this k_epoch: 0.06148301810026169
Average policy_loss of this k_epoch: -0.02004125714302063
KL Divergence Average Loss: 0.005462469533085823
Total Loss of this k_epoch: -0.023060783743858337

Last k_epoch stats:
Loss: -0.0230608 | Ratio: 1.0063618 | Entropy Term: 0.0614830


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  81%|████████▏ | 26/32 [02:20<00:32,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9219| 177.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.06535816192626953
Average policy_loss of this k_epoch: 0.0019013136625289917
KL Divergence Average Loss: 0.00047674382221885026
Total Loss of this k_epoch: -0.0013618271332234144






Entropy of this k_epoch: 0.05173950269818306
Average policy_loss of this k_epoch: 0.011392280459403992
KL Divergence Average Loss: 0.0014621508307754993
Total Loss of this k_epoch: 0.008819926530122757






Entropy of this k_epoch: 0.03586635738611221
Average policy_loss of this k_epoch: -0.018228581175208092
KL Divergence Average Loss: 0.0018197698518633842
Total Loss of this k_epoch: -0.020003700628876686


Entropy of this k_epoch: 0.023113200441002846
Average policy_loss of this k_epoch: -0.018756063655018806
KL Divergence Average Loss: 0.0040383716113865376
Total Loss of this k_epoch: -0.019871339201927185


Entropy of this k_epoch: 0.017513979226350784
Average policy_loss of this k_epoch: -0.018948130309581757
KL Divergence Average Loss: 0.005524893291294575
Total Loss of this k_epoch: -0.019768578931689262






Entropy of this k_epoch: 0.013653566129505634
Average policy_loss of this k_epoch: -0.01908854953944683
KL Divergence Average Loss: 0.008563915267586708
Total Loss of this k_epoch: -0.019685588777065277


Entropy of this k_epoch: 0.014118833467364311
Average policy_loss of this k_epoch: -0.01901838928461075
KL Divergence Average Loss: 0.021203018724918365
Total Loss of this k_epoch: -0.019512301310896873






Entropy of this k_epoch: 0.011633938178420067
Average policy_loss of this k_epoch: -0.019077874720096588
KL Divergence Average Loss: 0.02534571662545204
Total Loss of this k_epoch: -0.019406113773584366






Entropy of this k_epoch: 0.011610208079218864
Average policy_loss of this k_epoch: -0.019116193056106567
KL Divergence Average Loss: 0.023488126695156097
Total Loss of this k_epoch: -0.019461821764707565


Entropy of this k_epoch: 0.005450362339615822
Average policy_loss of this k_epoch: -0.01933414116501808
KL Divergence Average Loss: 0.010168710723519325
Total Loss of this k_epoch: -0.01950497180223465


Entropy of this k_epoch: 0.014473551884293556
Average policy_loss of this k_epoch: -0.018336255103349686
KL Divergence Average Loss: 0.10787767916917801
Total Loss of this k_epoch: -0.01798115484416485






Entropy of this k_epoch: 0.0038162795826792717
Average policy_loss of this k_epoch: -0.019377881661057472
KL Divergence Average Loss: 0.010780473239719868
Total Loss of this k_epoch: -0.01946089044213295


Entropy of this k_epoch: 0.00364676839672029
Average policy_loss of this k_epoch: -0.01938166469335556
KL Divergence Average Loss: 0.010572603903710842
Total Loss of this k_epoch: -0.01945827715098858


Entropy of this k_epoch: 0.003326883539557457




Average policy_loss of this k_epoch: -0.01938828080892563
KL Divergence Average Loss: 0.010734292678534985
Total Loss of this k_epoch: -0.01944728195667267


Entropy of this k_epoch: 0.0030283296946436167
Average policy_loss of this k_epoch: -0.019400443881750107
KL Divergence Average Loss: 0.010889039374887943
Total Loss of this k_epoch: -0.0194429699331522


Entropy of this k_epoch: 0.0043451013043522835
Average policy_loss of this k_epoch: -0.01934731751680374
KL Divergence Average Loss: 0.010589632205665112
Total Loss of this k_epoch: -0.019458675757050514






Entropy of this k_epoch: 0.024890746921300888
Average policy_loss of this k_epoch: -0.017460700124502182
KL Divergence Average Loss: 0.02865435555577278
Total Loss of this k_epoch: -0.0184186939150095


Entropy of this k_epoch: 0.013396513648331165
Average policy_loss of this k_epoch: -0.017846129834651947
KL Divergence Average Loss: 0.03200703486800194
Total Loss of this k_epoch: -0.01819588616490364


Entropy of this k_epoch: 0.003039408242329955
Average policy_loss of this k_epoch: -0.019389783963561058
KL Divergence Average Loss: 0.010999742895364761
Total Loss of this k_epoch: -0.01943175680935383






Entropy of this k_epoch: 0.001961208414286375
Average policy_loss of this k_epoch: -0.019422942772507668
KL Divergence Average Loss: 0.011430306360125542
Total Loss of this k_epoch: -0.019406700506806374


Entropy of this k_epoch: 0.0020167273469269276
Average policy_loss of this k_epoch: -0.019421925768256187
KL Divergence Average Loss: 0.011497167870402336
Total Loss of this k_epoch: -0.019407790154218674


Entropy of this k_epoch: 0.0021133599802851677
Average policy_loss of this k_epoch: -0.019417336210608482
KL Divergence Average Loss: 0.012166774831712246
Total Loss of this k_epoch: -0.01940133608877659






Entropy of this k_epoch: 0.01225158292800188
Average policy_loss of this k_epoch: -0.017696067690849304
KL Divergence Average Loss: 0.13709035515785217
Total Loss of this k_epoch: -0.0169377438724041


Entropy of this k_epoch: 0.011675801128149033
Average policy_loss of this k_epoch: -0.01846330612897873
KL Divergence Average Loss: 0.07756789028644562
Total Loss of this k_epoch: -0.018271418288350105


Entropy of this k_epoch: 0.001613888773135841
Average policy_loss of this k_epoch: -0.01943075656890869
KL Divergence Average Loss: 0.011607535183429718
Total Loss of this k_epoch: -0.01939537562429905






Entropy of this k_epoch: 0.0019430835964158177
Average policy_loss of this k_epoch: -0.01942973956465721
KL Divergence Average Loss: 0.01146610826253891
Total Loss of this k_epoch: -0.01941223256289959


Entropy of this k_epoch: 0.0018287263810634613
Average policy_loss of this k_epoch: -0.01942625641822815
KL Divergence Average Loss: 0.01148940995335579
Total Loss of this k_epoch: -0.019402798265218735


Entropy of this k_epoch: 0.0022927611134946346
Average policy_loss of this k_epoch: -0.019416028633713722
KL Divergence Average Loss: 0.011247977614402771
Total Loss of this k_epoch: -0.01941818743944168






Entropy of this k_epoch: 0.004381953272968531
Average policy_loss of this k_epoch: -0.019340338185429573
KL Divergence Average Loss: 0.010593559592962265
Total Loss of this k_epoch: -0.0194534994661808


Entropy of this k_epoch: 0.0027314748149365187
Average policy_loss of this k_epoch: -0.019403524696826935
KL Divergence Average Loss: 0.011044478043913841
Total Loss of this k_epoch: -0.019429652020335197


Entropy of this k_epoch: 0.00448136031627655
Average policy_loss of this k_epoch: -0.01936299167573452
KL Divergence Average Loss: 0.010427158325910568
Total Loss of this k_epoch: -0.019482789561152458






Entropy of this k_epoch: 0.005526010878384113
Average policy_loss of this k_epoch: -0.019295237958431244
KL Divergence Average Loss: 0.010528840124607086
Total Loss of this k_epoch: -0.019466251134872437


Entropy of this k_epoch: 0.006879621185362339
Average policy_loss of this k_epoch: -0.01795780658721924
KL Divergence Average Loss: 0.009961212053894997
Total Loss of this k_epoch: -0.018202174454927444


Entropy of this k_epoch: 0.00253502419218421
Average policy_loss of this k_epoch: -0.0194147527217865
KL Divergence Average Loss: 0.01114838756620884
Total Loss of this k_epoch: -0.019430020824074745






Entropy of this k_epoch: 0.0018882680451497436
Average policy_loss of this k_epoch: -0.019424188882112503
KL Divergence Average Loss: 0.011460669338703156
Total Loss of this k_epoch: -0.01940399594604969


Entropy of this k_epoch: 0.0015276148915290833
Average policy_loss of this k_epoch: -0.01943209022283554
KL Divergence Average Loss: 0.011645769700407982
Total Loss of this k_epoch: -0.019392013549804688


Entropy of this k_epoch: 0.001361972070299089
Average policy_loss of this k_epoch: -0.019436046481132507
KL Divergence Average Loss: 0.011734964326024055
Total Loss of this k_epoch: -0.019386794418096542






Entropy of this k_epoch: 0.0011458469089120626
Average policy_loss of this k_epoch: -0.019440049305558205
KL Divergence Average Loss: 0.011857688426971436
Total Loss of this k_epoch: -0.019378766417503357


Entropy of this k_epoch: 0.0011740991612896323
Average policy_loss of this k_epoch: -0.019440550357103348
KL Divergence Average Loss: 0.011977885849773884
Total Loss of this k_epoch: -0.019379476085305214


Entropy of this k_epoch: 0.0008977740071713924
Average policy_loss of this k_epoch: -0.01944597065448761
KL Divergence Average Loss: 0.011994250118732452
Total Loss of this k_epoch: -0.01937091536819935






Entropy of this k_epoch: 0.0008990457281470299
Average policy_loss of this k_epoch: -0.019445709884166718
KL Divergence Average Loss: 0.011992285959422588
Total Loss of this k_epoch: -0.01937074027955532


Entropy of this k_epoch: 0.0008945061708800495
Average policy_loss of this k_epoch: -0.01944596692919731
KL Divergence Average Loss: 0.011997601948678493
Total Loss of this k_epoch: -0.01937071606516838


Entropy of this k_epoch: 0.0007988892029970884
Average policy_loss of this k_epoch: -0.019447889178991318
KL Divergence Average Loss: 0.01215948723256588
Total Loss of this k_epoch: -0.01936623826622963






Entropy of this k_epoch: 0.008961326442658901
Average policy_loss of this k_epoch: -0.018844444304704666
KL Divergence Average Loss: 0.06603062897920609
Total Loss of this k_epoch: -0.018632205203175545


Entropy of this k_epoch: 0.000715822447091341
Average policy_loss of this k_epoch: -0.019449559971690178
KL Divergence Average Loss: 0.012101701460778713
Total Loss of this k_epoch: -0.019364332780241966


Entropy of this k_epoch: 0.0008826490957289934
Average policy_loss of this k_epoch: -0.01944633387029171
KL Divergence Average Loss: 0.012001127004623413
Total Loss of this k_epoch: -0.01937045343220234






Entropy of this k_epoch: 0.0007833333220332861
Average policy_loss of this k_epoch: -0.01944819837808609
KL Divergence Average Loss: 0.01206137239933014
Total Loss of this k_epoch: -0.019366750493645668


Entropy of this k_epoch: 0.0007391873514279723
Average policy_loss of this k_epoch: -0.019449178129434586
KL Divergence Average Loss: 0.012086360715329647
Total Loss of this k_epoch: -0.019365273416042328


Entropy of this k_epoch: 0.0007689023041166365
Average policy_loss of this k_epoch: -0.019448503851890564
KL Divergence Average Loss: 0.012068905867636204
Total Loss of this k_epoch: -0.01936626061797142






Entropy of this k_epoch: 0.0008023323607631028
Average policy_loss of this k_epoch: -0.019447872415184975
KL Divergence Average Loss: 0.012047640047967434
Total Loss of this k_epoch: -0.019367512315511703


Entropy of this k_epoch: 0.0008228018996305764
Average policy_loss of this k_epoch: -0.019447391852736473
KL Divergence Average Loss: 0.012035798281431198
Total Loss of this k_epoch: -0.01936817355453968


Entropy of this k_epoch: 0.0008819494396448135
Average policy_loss of this k_epoch: -0.019445620477199554
KL Divergence Average Loss: 0.012011343613266945
Total Loss of this k_epoch: -0.01936960592865944






Entropy of this k_epoch: 0.004991271533071995
Average policy_loss of this k_epoch: -0.0165864285081625
KL Divergence Average Loss: 0.06525509059429169
Total Loss of this k_epoch: -0.016183439642190933


Entropy of this k_epoch: 0.005929701030254364
Average policy_loss of this k_epoch: -0.019167620688676834
KL Divergence Average Loss: 0.012511029839515686
Total Loss of this k_epoch: -0.019338995218276978


Entropy of this k_epoch: 0.0008329381234943867
Average policy_loss of this k_epoch: -0.01944705657660961
KL Divergence Average Loss: 0.012032050639390945
Total Loss of this k_epoch: -0.019368382170796394






Entropy of this k_epoch: 0.003677810076624155
Average policy_loss of this k_epoch: -0.019319333136081696
KL Divergence Average Loss: 0.011759585700929165
Total Loss of this k_epoch: -0.019385626539587975


Entropy of this k_epoch: 0.011533204466104507
Average policy_loss of this k_epoch: -0.01734967902302742
KL Divergence Average Loss: 0.043516963720321655
Total Loss of this k_epoch: -0.017491169273853302


Entropy of this k_epoch: 0.0007784927729517221
Average policy_loss of this k_epoch: -0.01944824680685997
KL Divergence Average Loss: 0.012063596397638321
Total Loss of this k_epoch: -0.019366536289453506






Entropy of this k_epoch: 0.0007113778847269714
Average policy_loss of this k_epoch: -0.019449718296527863
KL Divergence Average Loss: 0.012102290987968445
Total Loss of this k_epoch: -0.019364263862371445


Entropy of this k_epoch: 0.0006957249715924263
Average policy_loss of this k_epoch: -0.0194499883800745
KL Divergence Average Loss: 0.012111390940845013
Total Loss of this k_epoch: -0.019363660365343094


Entropy of this k_epoch: 0.0007725011673755944
Average policy_loss of this k_epoch: -0.01944846846163273
KL Divergence Average Loss: 0.012065932154655457
Total Loss of this k_epoch: -0.019366435706615448




Epoch 27/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.46it/s]

Entropy of this k_epoch: 0.009137406013906002
Average policy_loss of this k_epoch: -0.018833791837096214
KL Divergence Average Loss: 0.06088196486234665
Total Loss of this k_epoch: -0.01868184097111225


Entropy of this k_epoch: 0.0008172365487553179
Average policy_loss of this k_epoch: -0.019447535276412964
KL Divergence Average Loss: 0.012080032378435135
Total Loss of this k_epoch: -0.019367597997188568


Entropy of this k_epoch: 0.0007804849883541465
Average policy_loss of this k_epoch: -0.01944844424724579
KL Divergence Average Loss: 0.01206306740641594
Total Loss of this k_epoch: -0.019366838037967682

Last k_epoch stats:
Loss: -0.0193668 | Ratio: 0.9967185 | Entropy Term: 0.0007805



>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  84%|████████▍ | 27/32 [02:26<00:27,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9219| 177.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.0008484869031235576
Average policy_loss of this k_epoch: 3.900378942489624e-06
KL Divergence Average Loss: 1.673027509241365e-05
Total Loss of this k_epoch: -3.835666211671196e-05






Entropy of this k_epoch: 0.001501946127973497
Average policy_loss of this k_epoch: 1.8402934074401855e-05
KL Divergence Average Loss: 0.0014089131727814674
Total Loss of this k_epoch: -4.2605239286785945e-05






Entropy of this k_epoch: 0.0007732630474492908
Average policy_loss of this k_epoch: 2.8014183044433594e-06
KL Divergence Average Loss: 1.165424328064546e-05
Total Loss of this k_epoch: -3.574519359972328e-05


Entropy of this k_epoch: 0.0008988492772914469
Average policy_loss of this k_epoch: 4.649162292480469e-06
KL Divergence Average Loss: 1.9192666513845325e-05
Total Loss of this k_epoch: -4.010137854493223e-05


Entropy of this k_epoch: 0.0008499390678480268
Average policy_loss of this k_epoch: 3.9711594581604e-06
KL Divergence Average Loss: 1.6259809854091145e-05
Total Loss of this k_epoch: -3.8363195926649496e-05






Entropy of this k_epoch: 0.0007238234393298626
Average policy_loss of this k_epoch: 2.078711986541748e-06
KL Divergence Average Loss: 1.2480723853514064e-05
Total Loss of this k_epoch: -3.398765329620801e-05


Entropy of this k_epoch: 0.0008698690216988325
Average policy_loss of this k_epoch: 4.135072231292725e-06
KL Divergence Average Loss: 3.156362799927592e-05
Total Loss of this k_epoch: -3.904274490196258e-05






Entropy of this k_epoch: 0.0009005871252156794
Average policy_loss of this k_epoch: 4.641711711883545e-06
KL Divergence Average Loss: 1.847362182161305e-05
Total Loss of this k_epoch: -4.020290725748055e-05






Entropy of this k_epoch: 0.0008070575422607362
Average policy_loss of this k_epoch: 3.3527612686157227e-06
KL Divergence Average Loss: 1.6548034182051197e-05
Total Loss of this k_epoch: -3.683463728521019e-05


Entropy of this k_epoch: 0.0008525853045284748
Average policy_loss of this k_epoch: 3.9711594581604e-06
KL Divergence Average Loss: 1.4397003724297974e-05
Total Loss of this k_epoch: -3.851413566735573e-05


Entropy of this k_epoch: 0.0009076794376596808
Average policy_loss of this k_epoch: 4.76837158203125e-06
KL Divergence Average Loss: 1.6883055650396273e-05
Total Loss of this k_epoch: -4.044676825287752e-05






Entropy of this k_epoch: 0.0008659824379719794
Average policy_loss of this k_epoch: 4.127621650695801e-06
KL Divergence Average Loss: 1.5624496882082894e-05
Total Loss of this k_epoch: -3.901525633409619e-05


Entropy of this k_epoch: 0.000941190286539495
Average policy_loss of this k_epoch: 5.304813385009766e-06
KL Divergence Average Loss: 2.2692725906381384e-05
Total Loss of this k_epoch: -4.1527775465510786e-05






Entropy of this k_epoch: 0.0008536595851182938
Average policy_loss of this k_epoch: 3.9637088775634766e-06
KL Divergence Average Loss: 1.5933215763652697e-05
Total Loss of this k_epoch: -3.8559937820537016e-05






Entropy of this k_epoch: 0.0008217544527724385
Average policy_loss of this k_epoch: 3.520399332046509e-06
KL Divergence Average Loss: 1.457862526876852e-05
Total Loss of this k_epoch: -3.742153785424307e-05


Entropy of this k_epoch: 0.0008894776692613959
Average policy_loss of this k_epoch: 4.4852495193481445e-06
KL Divergence Average Loss: 1.5324716514442116e-05
Total Loss of this k_epoch: -3.983538772445172e-05


Entropy of this k_epoch: 0.0009132548584602773
Average policy_loss of this k_epoch: 4.850327968597412e-06
KL Divergence Average Loss: 1.8203667423222214e-05
Total Loss of this k_epoch: -4.063037704327144e-05






Entropy of this k_epoch: 0.0009676885674707592
Average policy_loss of this k_epoch: 5.580484867095947e-06
KL Divergence Average Loss: 2.4526732886442915e-05
Total Loss of this k_epoch: -4.255867679603398e-05


Entropy of this k_epoch: 0.0008863023249432445
Average policy_loss of this k_epoch: 4.41819429397583e-06
KL Divergence Average Loss: 1.673351289355196e-05
Total Loss of this k_epoch: -3.972958802478388e-05






Entropy of this k_epoch: 0.0010368117364123464
Average policy_loss of this k_epoch: 6.686896085739136e-06
KL Divergence Average Loss: 2.6862166123464704e-05
Total Loss of this k_epoch: -4.4885069655720145e-05






Entropy of this k_epoch: 0.0009114867425523698
Average policy_loss of this k_epoch: 4.827976226806641e-06
KL Divergence Average Loss: 1.8662194634089246e-05
Total Loss of this k_epoch: -4.0559742046752945e-05


Entropy of this k_epoch: 0.0010591753525659442
Average policy_loss of this k_epoch: 7.018446922302246e-06
KL Divergence Average Loss: 2.914358083216939e-05
Total Loss of this k_epoch: -4.564888513414189e-05


Entropy of this k_epoch: 0.0010617425432428718
Average policy_loss of this k_epoch: 7.033348083496094e-06
KL Divergence Average Loss: 2.6592191716190428e-05
Total Loss of this k_epoch: -4.57878595625516e-05






Entropy of this k_epoch: 0.0010579015361145139
Average policy_loss of this k_epoch: 7.0445239543914795e-06
KL Divergence Average Loss: 2.9281356546562165e-05
Total Loss of this k_epoch: -4.5557739213109016e-05


Entropy of this k_epoch: 0.0009101122268475592
Average policy_loss of this k_epoch: 4.798173904418945e-06
KL Divergence Average Loss: 1.908125341287814e-05
Total Loss of this k_epoch: -4.0516624721931294e-05






Entropy of this k_epoch: 0.0009577403543516994
Average policy_loss of this k_epoch: 5.513429641723633e-06
KL Divergence Average Loss: 2.3832431907067075e-05
Total Loss of this k_epoch: -4.2135263356612995e-05






Entropy of this k_epoch: 0.0010672372300177813
Average policy_loss of this k_epoch: 7.160007953643799e-06
KL Divergence Average Loss: 3.057570938835852e-05
Total Loss of this k_epoch: -4.5896096708020195e-05


Entropy of this k_epoch: 0.0010774150723591447
Average policy_loss of this k_epoch: 7.323920726776123e-06
KL Divergence Average Loss: 3.1499654141953215e-05
Total Loss of this k_epoch: -4.623183485819027e-05


Entropy of this k_epoch: 0.0010260543785989285
Average policy_loss of this k_epoch: 6.489455699920654e-06
KL Divergence Average Loss: 2.5849723897408694e-05
Total Loss of this k_epoch: -4.455476664588787e-05






Entropy of this k_epoch: 0.0011510864133015275
Average policy_loss of this k_epoch: 8.463859558105469e-06
KL Divergence Average Loss: 4.0305007132701576e-05
Total Loss of this k_epoch: -4.868741234531626e-05


Entropy of this k_epoch: 0.0010591947939246893
Average policy_loss of this k_epoch: 7.0855021476745605e-06
KL Divergence Average Loss: 3.338754322612658e-05
Total Loss of this k_epoch: -4.5540364226326346e-05






Entropy of this k_epoch: 0.0010577766224741936
Average policy_loss of this k_epoch: 7.063150405883789e-06
KL Divergence Average Loss: 3.128057505819015e-05
Total Loss of this k_epoch: -4.551287565845996e-05






Entropy of this k_epoch: 0.000999057781882584
Average policy_loss of this k_epoch: 6.098300218582153e-06
KL Divergence Average Loss: 2.4009719709283672e-05
Total Loss of this k_epoch: -4.361449464340694e-05


Entropy of this k_epoch: 0.0010958946077153087
Average policy_loss of this k_epoch: 7.487833499908447e-06
KL Divergence Average Loss: 3.0593575502280146e-05
Total Loss of this k_epoch: -4.700096178567037e-05


Entropy of this k_epoch: 0.0009499601437710226
Average policy_loss of this k_epoch: 5.37186861038208e-06
KL Divergence Average Loss: 2.060819315374829e-05
Total Loss of this k_epoch: -4.1920058720279485e-05






Entropy of this k_epoch: 0.0011695410357788205
Average policy_loss of this k_epoch: 8.702278137207031e-06
KL Divergence Average Loss: 4.091370647074655e-05
Total Loss of this k_epoch: -4.936563709634356e-05


Entropy of this k_epoch: 0.0011828383430838585
Average policy_loss of this k_epoch: 8.869916200637817e-06
KL Divergence Average Loss: 3.94446833524853e-05
Total Loss of this k_epoch: -4.987755528418347e-05






Entropy of this k_epoch: 0.0011310300324112177
Average policy_loss of this k_epoch: 8.11740756034851e-06
KL Divergence Average Loss: 3.717079380294308e-05
Total Loss of this k_epoch: -4.806238939636387e-05






Entropy of this k_epoch: 0.0011663140030577779
Average policy_loss of this k_epoch: 8.657574653625488e-06
KL Divergence Average Loss: 3.91997309634462e-05
Total Loss of this k_epoch: -4.926612746203318e-05


Entropy of this k_epoch: 0.0010454216971993446
Average policy_loss of this k_epoch: 6.9141387939453125e-06
KL Divergence Average Loss: 3.476183337625116e-05
Total Loss of this k_epoch: -4.500932845985517e-05


Entropy of this k_epoch: 0.0012329823803156614
Average policy_loss of this k_epoch: 9.745359420776367e-06
KL Divergence Average Loss: 4.737604103866033e-05
Total Loss of this k_epoch: -5.143000453244895e-05






Entropy of this k_epoch: 0.0012020326685160398
Average policy_loss of this k_epoch: 9.156763553619385e-06
KL Divergence Average Loss: 4.004530273959972e-05
Total Loss of this k_epoch: -5.054441498941742e-05


Entropy of this k_epoch: 0.0012523768236860633
Average policy_loss of this k_epoch: 1.0065734386444092e-05
KL Divergence Average Loss: 5.3944018873153254e-05
Total Loss of this k_epoch: -5.2013667300343513e-05






Entropy of this k_epoch: 0.0028267328161746264
Average policy_loss of this k_epoch: 4.919618368148804e-05
KL Divergence Average Loss: 0.00507960794493556
Total Loss of this k_epoch: -4.134437767788768e-05






Entropy of this k_epoch: 0.0020349062979221344
Average policy_loss of this k_epoch: 2.7738511562347412e-05
KL Divergence Average Loss: 0.002408308442682028
Total Loss of this k_epoch: -4.9923721235245466e-05


Entropy of this k_epoch: 0.0012156390585005283
Average policy_loss of this k_epoch: 9.47713851928711e-06
KL Divergence Average Loss: 4.786404315382242e-05
Total Loss of this k_epoch: -5.0826172810047865e-05


Entropy of this k_epoch: 0.0012756465002894402
Average policy_loss of this k_epoch: 1.0382384061813354e-05
KL Divergence Average Loss: 5.4316609748639166e-05
Total Loss of this k_epoch: -5.285677616484463e-05






Entropy of this k_epoch: 0.0011346685932949185
Average policy_loss of this k_epoch: 8.221715688705444e-06
KL Divergence Average Loss: 4.0394821553491056e-05
Total Loss of this k_epoch: -4.8107765906024724e-05


Entropy of this k_epoch: 0.0014810420107096434
Average policy_loss of this k_epoch: 1.3496726751327515e-05
KL Divergence Average Loss: 7.536941120633855e-05
Total Loss of this k_epoch: -5.980167770758271e-05






Entropy of this k_epoch: 0.001210429472848773
Average policy_loss of this k_epoch: 9.346753358840942e-06
KL Divergence Average Loss: 4.580402310239151e-05
Total Loss of this k_epoch: -5.0716680561890826e-05






Entropy of this k_epoch: 0.0012720476370304823
Average policy_loss of this k_epoch: 1.0341405868530273e-05
KL Divergence Average Loss: 5.5626154789933935e-05
Total Loss of this k_epoch: -5.270471592666581e-05


Entropy of this k_epoch: 0.0015331958420574665
Average policy_loss of this k_epoch: 1.4331191778182983e-05
KL Divergence Average Loss: 8.43865200295113e-05
Total Loss of this k_epoch: -6.14847376709804e-05


Entropy of this k_epoch: 0.0014398556668311357
Average policy_loss of this k_epoch: 1.2952834367752075e-05
KL Divergence Average Loss: 7.139625085983425e-05
Total Loss of this k_epoch: -5.832598981214687e-05






Entropy of this k_epoch: 0.0013140817172825336
Average policy_loss of this k_epoch: 1.0989606380462646e-05
KL Divergence Average Loss: 5.8177829487249255e-05
Total Loss of this k_epoch: -5.4132702643983066e-05


Entropy of this k_epoch: 0.0011990973725914955
Average policy_loss of this k_epoch: 9.324401617050171e-06
KL Divergence Average Loss: 5.085479642730206e-05
Total Loss of this k_epoch: -5.012191832065582e-05






Entropy of this k_epoch: 0.0015052109956741333
Average policy_loss of this k_epoch: 1.4021992683410645e-05
KL Divergence Average Loss: 8.409592555835843e-05
Total Loss of this k_epoch: -6.039759682607837e-05






Entropy of this k_epoch: 0.001412422046996653
Average policy_loss of this k_epoch: 1.2539327144622803e-05
KL Divergence Average Loss: 7.339599687838927e-05
Total Loss of this k_epoch: -5.734781734645367e-05


Entropy of this k_epoch: 0.0016206181608140469
Average policy_loss of this k_epoch: 1.5817582607269287e-05
KL Divergence Average Loss: 0.00010065569949802011
Total Loss of this k_epoch: -6.42067680018954e-05


Entropy of this k_epoch: 0.0015668505802750587
Average policy_loss of this k_epoch: 1.4901161193847656e-05
KL Divergence Average Loss: 9.135810978477821e-05
Total Loss of this k_epoch: -6.252778985071927e-05






Entropy of this k_epoch: 0.0016847526421770453
Average policy_loss of this k_epoch: 1.6849488019943237e-05
KL Divergence Average Loss: 0.00010846982331713662
Total Loss of this k_epoch: -6.630344432778656e-05


Entropy of this k_epoch: 0.0015442605363205075
Average policy_loss of this k_epoch: 1.461803913116455e-05
KL Divergence Average Loss: 9.429590863874182e-05
Total Loss of this k_epoch: -6.165203376440331e-05






Entropy of this k_epoch: 0.0015527608338743448
Average policy_loss of this k_epoch: 1.4711171388626099e-05
KL Divergence Average Loss: 9.069641964742914e-05
Total Loss of this k_epoch: -6.201990618137643e-05




Epoch 28/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.41it/s]


Entropy of this k_epoch: 0.0016277388203889132
Average policy_loss of this k_epoch: 1.5888363122940063e-05
KL Divergence Average Loss: 9.967206278815866e-05
Total Loss of this k_epoch: -6.450185901485384e-05


Entropy of this k_epoch: 0.0016797708813101053
Average policy_loss of this k_epoch: 1.6786158084869385e-05
KL Divergence Average Loss: 0.00011033970076823607
Total Loss of this k_epoch: -6.609898991882801e-05

Last k_epoch stats:
Loss: -0.0000661 | Ratio: 0.9998657 | Entropy Term: 0.0016798


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  88%|████████▊ | 28/32 [02:31<00:21,  5.40s/it]

Entire Validation Dataset Accuracy: 0.9323| 179.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.0019139265641570091
Average policy_loss of this k_epoch: 1.0091811418533325e-05
KL Divergence Average Loss: 3.9425187424058095e-05
Total Loss of this k_epoch: -8.521026757080108e-05






Entropy of this k_epoch: 0.001480421400628984
Average policy_loss of this k_epoch: 3.0659139156341553e-06
KL Divergence Average Loss: 1.5386973245767877e-05
Total Loss of this k_epoch: -7.080128852976486e-05






Entropy of this k_epoch: 0.0017140135169029236
Average policy_loss of this k_epoch: 6.861984729766846e-06
KL Divergence Average Loss: 2.7566406060941517e-05
Total Loss of this k_epoch: -7.856302545405924e-05


Entropy of this k_epoch: 0.001801988109946251
Average policy_loss of this k_epoch: 8.266419172286987e-06
KL Divergence Average Loss: 3.081202885368839e-05
Total Loss of this k_epoch: -8.152486407198012e-05


Entropy of this k_epoch: 0.0018535854760557413
Average policy_loss of this k_epoch: 9.004026651382446e-06
KL Divergence Average Loss: 3.144115180475637e-05
Total Loss of this k_epoch: -8.33608428365551e-05






Entropy of this k_epoch: 0.0018151022959500551
Average policy_loss of this k_epoch: 8.530914783477783e-06
KL Divergence Average Loss: 3.588932304410264e-05
Total Loss of this k_epoch: -8.186530612874776e-05


Entropy of this k_epoch: 0.0016296806279569864
Average policy_loss of this k_epoch: 5.5730342864990234e-06
KL Divergence Average Loss: 2.8744241717504337e-05
Total Loss of this k_epoch: -7.562355312984437e-05






Entropy of this k_epoch: 0.001801760750822723
Average policy_loss of this k_epoch: 8.314847946166992e-06
KL Divergence Average Loss: 3.2752457627793774e-05
Total Loss of this k_epoch: -8.144566527334973e-05






Entropy of this k_epoch: 0.0017914100317284465
Average policy_loss of this k_epoch: 8.139759302139282e-06
KL Divergence Average Loss: 3.2290936360368505e-05
Total Loss of this k_epoch: -8.110783528536558e-05


Entropy of this k_epoch: 0.001810002839192748
Average policy_loss of this k_epoch: 8.538365364074707e-06
KL Divergence Average Loss: 3.644373646238819e-05
Total Loss of this k_epoch: -8.1597339885775e-05


Entropy of this k_epoch: 0.0020649575162678957
Average policy_loss of this k_epoch: 1.2625008821487427e-05
KL Divergence Average Loss: 5.1677398005267605e-05
Total Loss of this k_epoch: -9.010609210235998e-05






Entropy of this k_epoch: 0.0018857771065086126
Average policy_loss of this k_epoch: 9.607523679733276e-06
KL Divergence Average Loss: 3.716037463163957e-05
Total Loss of this k_epoch: -8.430972957285121e-05


Entropy of this k_epoch: 0.002148892730474472
Average policy_loss of this k_epoch: 1.403316855430603e-05
KL Divergence Average Loss: 5.342290751286782e-05
Total Loss of this k_epoch: -9.287724242312834e-05






Entropy of this k_epoch: 0.0020364969968795776
Average policy_loss of this k_epoch: 1.2166798114776611e-05
KL Divergence Average Loss: 5.09818819409702e-05
Total Loss of this k_epoch: -8.914823411032557e-05






Entropy of this k_epoch: 0.00192947406321764
Average policy_loss of this k_epoch: 1.0527670383453369e-05
KL Divergence Average Loss: 5.074713044450618e-05
Total Loss of this k_epoch: -8.543856529286131e-05


Entropy of this k_epoch: 0.0022340957075357437
Average policy_loss of this k_epoch: 1.5307217836380005e-05
KL Divergence Average Loss: 6.13224328844808e-05
Total Loss of this k_epoch: -9.578434401191771e-05


Entropy of this k_epoch: 0.0021898176055401564
Average policy_loss of this k_epoch: 1.4584511518478394e-05
KL Divergence Average Loss: 5.895857611903921e-05
Total Loss of this k_epoch: -9.431678336113691e-05






Entropy of this k_epoch: 0.0022140974178910255
Average policy_loss of this k_epoch: 1.514330506324768e-05
KL Divergence Average Loss: 6.409514753613621e-05
Total Loss of this k_epoch: -9.4920615083538e-05


Entropy of this k_epoch: 0.0024796617217361927
Average policy_loss of this k_epoch: 1.942366361618042e-05
KL Divergence Average Loss: 7.806898793205619e-05
Total Loss of this k_epoch: -0.00010377873695688322






Entropy of this k_epoch: 0.002409655135124922
Average policy_loss of this k_epoch: 1.8343329429626465e-05
KL Divergence Average Loss: 7.884762453613803e-05
Total Loss of this k_epoch: -0.00010135095362784341






Entropy of this k_epoch: 0.002527130302041769
Average policy_loss of this k_epoch: 2.0381063222885132e-05
KL Divergence Average Loss: 9.027820487972349e-05
Total Loss of this k_epoch: -0.00010507267143111676


Entropy of this k_epoch: 0.0025229062885046005
Average policy_loss of this k_epoch: 2.008676528930664e-05
KL Divergence Average Loss: 8.458749653073028e-05
Total Loss of this k_epoch: -0.00010521267540752888


Entropy of this k_epoch: 0.0025887973606586456
Average policy_loss of this k_epoch: 2.1208077669143677e-05
KL Divergence Average Loss: 8.617991989012808e-05
Total Loss of this k_epoch: -0.00010736998956417665






Entropy of this k_epoch: 0.00240972894243896
Average policy_loss of this k_epoch: 1.833587884902954e-05
KL Divergence Average Loss: 7.455823651980609e-05
Total Loss of this k_epoch: -0.00010140498488908634


Entropy of this k_epoch: 0.0025298730470240116
Average policy_loss of this k_epoch: 2.0615756511688232e-05
KL Divergence Average Loss: 9.585470252204686e-05
Total Loss of this k_epoch: -0.00010491935245227069






Entropy of this k_epoch: 0.0025908988900482655
Average policy_loss of this k_epoch: 2.1554529666900635e-05
KL Divergence Average Loss: 0.00010326823394279927
Total Loss of this k_epoch: -0.00010695773380575702






Entropy of this k_epoch: 0.002597188111394644
Average policy_loss of this k_epoch: 2.158433198928833e-05
KL Divergence Average Loss: 9.781233529793099e-05
Total Loss of this k_epoch: -0.00010729696077760309


Entropy of this k_epoch: 0.0024455981329083443
Average policy_loss of this k_epoch: 1.8812716007232666e-05
KL Divergence Average Loss: 7.619891403010115e-05
Total Loss of this k_epoch: -0.00010270519851474091


Entropy of this k_epoch: 0.003057211870327592
Average policy_loss of this k_epoch: 2.961978316307068e-05
KL Divergence Average Loss: 0.00015916340635158122
Total Loss of this k_epoch: -0.00012164918007329106






Entropy of this k_epoch: 0.0028651340398937464
Average policy_loss of this k_epoch: 2.6173889636993408e-05
KL Divergence Average Loss: 0.00012648179836105555
Total Loss of this k_epoch: -0.00011581800208659843


Entropy of this k_epoch: 0.0030681644566357136
Average policy_loss of this k_epoch: 2.9921531677246094e-05
KL Divergence Average Loss: 0.00016562954988330603
Total Loss of this k_epoch: -0.00012183039507362992






Entropy of this k_epoch: 0.003037639893591404
Average policy_loss of this k_epoch: 2.8975307941436768e-05
KL Divergence Average Loss: 0.00016353133833035827
Total Loss of this k_epoch: -0.00012127136869821697






Entropy of this k_epoch: 0.0031265730503946543
Average policy_loss of this k_epoch: 3.1031668186187744e-05
KL Divergence Average Loss: 0.00018312115571461618
Total Loss of this k_epoch: -0.00012346576841082424


Entropy of this k_epoch: 0.0029975674115121365
Average policy_loss of this k_epoch: 2.8621405363082886e-05
KL Divergence Average Loss: 0.000159157338202931
Total Loss of this k_epoch: -0.00011966539022978395


Entropy of this k_epoch: 0.0034286226145923138
Average policy_loss of this k_epoch: 3.618746995925903e-05
KL Divergence Average Loss: 0.00021502574963960797
Total Loss of this k_epoch: -0.00013309341738931835






Entropy of this k_epoch: 0.003520505502820015
Average policy_loss of this k_epoch: 3.770366311073303e-05
KL Divergence Average Loss: 0.0002292900171596557
Total Loss of this k_epoch: -0.00013602871331386268


Entropy of this k_epoch: 0.0035513590555638075
Average policy_loss of this k_epoch: 3.845244646072388e-05
KL Divergence Average Loss: 0.0002338154736207798
Total Loss of this k_epoch: -0.00013677735114470124






Entropy of this k_epoch: 0.003642298048362136
Average policy_loss of this k_epoch: 4.0084123611450195e-05
KL Divergence Average Loss: 0.0002592195523902774
Total Loss of this k_epoch: -0.00013943859084974974






Entropy of this k_epoch: 0.0037436005659401417
Average policy_loss of this k_epoch: 4.212185740470886e-05
KL Divergence Average Loss: 0.00027870613848790526
Total Loss of this k_epoch: -0.00014227110659703612


Entropy of this k_epoch: 0.0039026965387165546
Average policy_loss of this k_epoch: 4.476308822631836e-05
KL Divergence Average Loss: 0.0003033243410754949
Total Loss of this k_epoch: -0.00014733849093317986


Entropy of this k_epoch: 0.0037579028867185116
Average policy_loss of this k_epoch: 4.215911030769348e-05
KL Divergence Average Loss: 0.00027463160222396255
Total Loss of this k_epoch: -0.00014298970927484334






Entropy of this k_epoch: 0.004004444926977158
Average policy_loss of this k_epoch: 4.6506524085998535e-05
KL Divergence Average Loss: 0.0003043545875698328
Total Loss of this k_epoch: -0.0001506721746409312


Entropy of this k_epoch: 0.00427604466676712
Average policy_loss of this k_epoch: 5.159154534339905e-05
KL Divergence Average Loss: 0.00036346600973047316
Total Loss of this k_epoch: -0.00015857603284530342






Entropy of this k_epoch: 0.003980547189712524
Average policy_loss of this k_epoch: 4.628673195838928e-05
KL Divergence Average Loss: 0.0003158221661578864
Total Loss of this k_epoch: -0.00014958241081330925






Entropy of this k_epoch: 0.0041031972505152225
Average policy_loss of this k_epoch: 4.826486110687256e-05
KL Divergence Average Loss: 0.0003337301895953715
Total Loss of this k_epoch: -0.0001535577030153945


Entropy of this k_epoch: 0.004780424293130636
Average policy_loss of this k_epoch: 6.099045276641846e-05
KL Divergence Average Loss: 0.00046961315092630684
Total Loss of this k_epoch: -0.000173334643477574


Entropy of this k_epoch: 0.00493307551369071
Average policy_loss of this k_epoch: 6.423890590667725e-05
KL Divergence Average Loss: 0.0005239558522589505
Total Loss of this k_epoch: -0.0001771753013599664






Entropy of this k_epoch: 0.005075682885944843
Average policy_loss of this k_epoch: 6.667524576187134e-05
KL Divergence Average Loss: 0.0005571006913669407
Total Loss of this k_epoch: -0.00018153790733776987


Entropy of this k_epoch: 0.005026271101087332
Average policy_loss of this k_epoch: 6.6414475440979e-05
KL Divergence Average Loss: 0.0005806481931358576
Total Loss of this k_epoch: -0.0001790926035027951






Entropy of this k_epoch: 0.0051391771994531155
Average policy_loss of this k_epoch: 6.783753633499146e-05
KL Divergence Average Loss: 0.0005594861577264965
Total Loss of this k_epoch: -0.00018352645565755665






Entropy of this k_epoch: 0.004877923056483269
Average policy_loss of this k_epoch: 6.302446126937866e-05
KL Divergence Average Loss: 0.0005219876766204834
Total Loss of this k_epoch: -0.000175651817698963


Entropy of this k_epoch: 0.00544874370098114
Average policy_loss of this k_epoch: 7.385015487670898e-05
KL Divergence Average Loss: 0.0006419047713279724
Total Loss of this k_epoch: -0.00019216799410060048


Entropy of this k_epoch: 0.00596575066447258
Average policy_loss of this k_epoch: 8.399039506912231e-05
KL Divergence Average Loss: 0.0007766723865643144
Total Loss of this k_epoch: -0.00020653042884077877






Entropy of this k_epoch: 0.005164521746337414
Average policy_loss of this k_epoch: 6.901472806930542e-05
KL Divergence Average Loss: 0.0006100501050241292
Total Loss of this k_epoch: -0.00018311086751054972


Entropy of this k_epoch: 0.00585270207375288
Average policy_loss of this k_epoch: 8.217990398406982e-05
KL Divergence Average Loss: 0.0007917496841400862
Total Loss of this k_epoch: -0.00020253771799616516






Entropy of this k_epoch: 0.006068823859095573
Average policy_loss of this k_epoch: 8.596107363700867e-05
KL Divergence Average Loss: 0.0008246251381933689
Total Loss of this k_epoch: -0.00020923386909998953







Entropy of this k_epoch: 0.005920713767409325
Average policy_loss of this k_epoch: 8.36104154586792e-05
KL Divergence Average Loss: 0.0008102986612357199
Total Loss of this k_epoch: -0.00020432229212019593


Entropy of this k_epoch: 0.006782779470086098
Average policy_loss of this k_epoch: 0.00010070204734802246
KL Divergence Average Loss: 0.0010455672163516283
Total Loss of this k_epoch: -0.0002279812761116773



Epoch 29/32 (Inner K-Epochs):  91%|█████████ | 58/64 [00:04<00:00, 12.11it/s][A


Entropy of this k_epoch: 0.007213637698441744
Average policy_loss of this k_epoch: 0.0001092180609703064
KL Divergence Average Loss: 0.001167220063507557
Total Loss of this k_epoch: -0.00023979161051101983






Entropy of this k_epoch: 0.007010204251855612
Average policy_loss of this k_epoch: 0.00010460987687110901
KL Divergence Average Loss: 0.0010901354253292084
Total Loss of this k_epoch: -0.00023499896633438766






Entropy of this k_epoch: 0.00702802836894989
Average policy_loss of this k_epoch: 0.0001056864857673645
KL Divergence Average Loss: 0.001117176958359778
Total Loss of this k_epoch: -0.00023454317124560475


Entropy of this k_epoch: 0.007297784090042114
Average policy_loss of this k_epoch: 0.00011146813631057739
KL Divergence Average Loss: 0.001235645730048418
Total Loss of this k_epoch: -0.0002410646266071126


Entropy of this k_epoch: 0.008320791646838188
Average policy_loss of this k_epoch: 0.0001317448914051056
KL Divergence Average Loss: 0.0015294912736862898
Total Loss of this k_epoch: -0.0002689997781999409




Epoch 29/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.39it/s]


Entropy of this k_epoch: 0.008442050777375698
Average policy_loss of this k_epoch: 0.00013428181409835815
KL Divergence Average Loss: 0.0015654349699616432
Total Loss of this k_epoch: -0.0002721663913689554

Last k_epoch stats:
Loss: -0.0002722 | Ratio: 0.9989257 | Entropy Term: 0.0084421
Entire Validation Dataset Accuracy: 0.9323| 179.0 / 192.0 samples


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  91%|█████████ | 29/32 [02:37<00:16,  5.40s/it]

old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.009311102330684662
Average policy_loss of this k_epoch: 3.203749656677246e-05
KL Divergence Average Loss: 6.6705237259157e-05
Total Loss of this k_epoch: -0.0004328505601733923






Entropy of this k_epoch: 0.009232562966644764
Average policy_loss of this k_epoch: 3.0957162380218506e-05
KL Divergence Average Loss: 8.693209383636713e-05
Total Loss of this k_epoch: -0.00042980167199857533






Entropy of this k_epoch: 0.00934490654617548
Average policy_loss of this k_epoch: 3.3564865589141846e-05
KL Divergence Average Loss: 9.330467582913116e-05
Total Loss of this k_epoch: -0.00043274741619825363


Entropy of this k_epoch: 0.010570341721177101
Average policy_loss of this k_epoch: 5.9314072132110596e-05
KL Divergence Average Loss: 0.00013708145706914365
Total Loss of this k_epoch: -0.0004678322293329984


Entropy of this k_epoch: 0.011239788495004177
Average policy_loss of this k_epoch: 7.564574480056763e-05
KL Divergence Average Loss: 0.00025558145716786385
Total Loss of this k_epoch: -0.0004837878805119544






Entropy of this k_epoch: 0.011881386861205101
Average policy_loss of this k_epoch: 8.931756019592285e-05
KL Divergence Average Loss: 0.00024014644441194832
Total Loss of this k_epoch: -0.0005023503326810896


Entropy of this k_epoch: 0.01250185165554285
Average policy_loss of this k_epoch: 0.00010285153985023499
KL Divergence Average Loss: 0.00032059900695458055
Total Loss of this k_epoch: -0.0005190350348129869






Entropy of this k_epoch: 0.01383912842720747
Average policy_loss of this k_epoch: 0.00013450533151626587
KL Divergence Average Loss: 0.0004973625764250755
Total Loss of this k_epoch: -0.0005524774896912277






Entropy of this k_epoch: 0.014406552538275719
Average policy_loss of this k_epoch: 0.0001478642225265503
KL Divergence Average Loss: 0.0005646263598464429
Total Loss of this k_epoch: -0.000566817179787904


Entropy of this k_epoch: 0.014726775698363781
Average policy_loss of this k_epoch: 0.00015547126531600952
KL Divergence Average Loss: 0.0006831243517808616
Total Loss of this k_epoch: -0.000574036268517375


Entropy of this k_epoch: 0.015785444527864456
Average policy_loss of this k_epoch: 0.00018017739057540894
KL Divergence Average Loss: 0.00077378551941365
Total Loss of this k_epoch: -0.0006013570236973464






Entropy of this k_epoch: 0.01975034922361374
Average policy_loss of this k_epoch: 0.00027947500348091125
KL Divergence Average Loss: 0.0015441306168213487
Total Loss of this k_epoch: -0.0006926011992618442


Entropy of this k_epoch: 0.01946759596467018
Average policy_loss of this k_epoch: 0.00027120858430862427
KL Divergence Average Loss: 0.001468429109081626
Total Loss of this k_epoch: -0.0006874869577586651






Entropy of this k_epoch: 0.022079426795244217
Average policy_loss of this k_epoch: 0.00033958256244659424
KL Divergence Average Loss: 0.0021345987915992737
Total Loss of this k_epoch: -0.0007430427940562367






Entropy of this k_epoch: 0.02241630107164383
Average policy_loss of this k_epoch: 0.00034872815012931824
KL Divergence Average Loss: 0.002229143399745226
Total Loss of this k_epoch: -0.0007497954647988081


Entropy of this k_epoch: 0.025023311376571655
Average policy_loss of this k_epoch: 0.0004179254174232483
KL Divergence Average Loss: 0.0030489913187921047
Total Loss of this k_epoch: -0.0008027503499761224


Entropy of this k_epoch: 0.028850480914115906
Average policy_loss of this k_epoch: 0.0005239509046077728
KL Divergence Average Loss: 0.00429338775575161
Total Loss of this k_epoch: -0.0008756392635405064






Entropy of this k_epoch: 0.03368854895234108
Average policy_loss of this k_epoch: 0.0006604194641113281
KL Divergence Average Loss: 0.006058346014469862
Total Loss of this k_epoch: -0.0009634245652705431


Entropy of this k_epoch: 0.03659933805465698
Average policy_loss of this k_epoch: 0.0007465332746505737
KL Divergence Average Loss: 0.007366359233856201
Total Loss of this k_epoch: -0.0010097699705511332






Entropy of this k_epoch: 0.04079795628786087
Average policy_loss of this k_epoch: 0.0008757635951042175
KL Divergence Average Loss: 0.00934615544974804
Total Loss of this k_epoch: -0.0010706728789955378






Entropy of this k_epoch: 0.047763220965862274
Average policy_loss of this k_epoch: 0.0010950081050395966
KL Divergence Average Loss: 0.013052908703684807
Total Loss of this k_epoch: -0.0011626239866018295


Entropy of this k_epoch: 0.05224720388650894
Average policy_loss of this k_epoch: 0.001248180866241455
KL Divergence Average Loss: 0.015921974554657936
Total Loss of this k_epoch: -0.0012049596989527345


Entropy of this k_epoch: 0.06040345877408981
Average policy_loss of this k_epoch: 0.0015300139784812927
KL Divergence Average Loss: 0.021522406488656998
Total Loss of this k_epoch: -0.001274934853427112






Entropy of this k_epoch: 0.06746732443571091
Average policy_loss of this k_epoch: 0.0017829686403274536
KL Divergence Average Loss: 0.026676686480641365
Total Loss of this k_epoch: -0.0013236308004707098


Entropy of this k_epoch: 0.07827840745449066
Average policy_loss of this k_epoch: 0.0022062137722969055
KL Divergence Average Loss: 0.036253876984119415
Total Loss of this k_epoch: -0.0013451678678393364






Entropy of this k_epoch: 0.0909942239522934
Average policy_loss of this k_epoch: 0.0027239546179771423
KL Divergence Average Loss: 0.04842809587717056
Total Loss of this k_epoch: -0.001341475872322917






Entropy of this k_epoch: 0.10000135004520416
Average policy_loss of this k_epoch: 0.003101196140050888
KL Divergence Average Loss: 0.05778644233942032
Total Loss of this k_epoch: -0.0013210067991167307


Entropy of this k_epoch: 0.11141301691532135
Average policy_loss of this k_epoch: 0.003622286021709442
KL Divergence Average Loss: 0.07156774401664734
Total Loss of this k_epoch: -0.001232687383890152


Entropy of this k_epoch: 0.11034451425075531
Average policy_loss of this k_epoch: 0.0036394596099853516
KL Divergence Average Loss: 0.07275190949440002
Total Loss of this k_epoch: -0.0011502471752464771






Entropy of this k_epoch: 0.10269278287887573
Average policy_loss of this k_epoch: 0.0032375752925872803
KL Divergence Average Loss: 0.06177867576479912
Total Loss of this k_epoch: -0.0012792772613465786


Entropy of this k_epoch: 0.10732261836528778
Average policy_loss of this k_epoch: 0.003418281674385071
KL Divergence Average Loss: 0.06593199074268341
Total Loss of this k_epoch: -0.001288529485464096






Entropy of this k_epoch: 0.098939448595047
Average policy_loss of this k_epoch: 0.003079306334257126
KL Divergence Average Loss: 0.05737994611263275
Total Loss of this k_epoch: -0.0012938668951392174






Entropy of this k_epoch: 0.09047801047563553
Average policy_loss of this k_epoch: 0.0026843734085559845
KL Divergence Average Loss: 0.04731229320168495
Total Loss of this k_epoch: -0.0013664043508470058


Entropy of this k_epoch: 0.08997432887554169
Average policy_loss of this k_epoch: 0.0026639997959136963
KL Divergence Average Loss: 0.04691958799958229
Total Loss of this k_epoch: -0.0013655207585543394


Entropy of this k_epoch: 0.08366674929857254
Average policy_loss of this k_epoch: 0.0024077221751213074
KL Divergence Average Loss: 0.04056654870510101
Total Loss of this k_epoch: -0.001369949895888567






Entropy of this k_epoch: 0.07929452508687973
Average policy_loss of this k_epoch: 0.0022378042340278625
KL Divergence Average Loss: 0.0367862768471241
Total Loss of this k_epoch: -0.0013590594753623009


Entropy of this k_epoch: 0.07248470187187195
Average policy_loss of this k_epoch: 0.0019789747893810272
KL Divergence Average Loss: 0.0311498511582613
Total Loss of this k_epoch: -0.001333761727437377






Entropy of this k_epoch: 0.0718829557299614
Average policy_loss of this k_epoch: 0.001959182322025299
KL Divergence Average Loss: 0.030588960275053978
Total Loss of this k_epoch: -0.0013290757779031992






Entropy of this k_epoch: 0.07444450259208679
Average policy_loss of this k_epoch: 0.0020422153174877167
KL Divergence Average Loss: 0.0323554128408432
Total Loss of this k_epoch: -0.0013564557302743196


Entropy of this k_epoch: 0.07148031890392303
Average policy_loss of this k_epoch: 0.0019381046295166016
KL Divergence Average Loss: 0.0302569717168808
Total Loss of this k_epoch: -0.0013333417009562254


Entropy of this k_epoch: 0.07041820883750916
Average policy_loss of this k_epoch: 0.001896388828754425
KL Divergence Average Loss: 0.029074925929307938
Total Loss of this k_epoch: -0.001333772437646985






Entropy of this k_epoch: 0.0696149617433548
Average policy_loss of this k_epoch: 0.0018829591572284698
KL Divergence Average Loss: 0.02907671593129635
Total Loss of this k_epoch: -0.001307021826505661


Entropy of this k_epoch: 0.07187707722187042
Average policy_loss of this k_epoch: 0.001942213624715805
KL Divergence Average Loss: 0.03004603646695614
Total Loss of this k_epoch: -0.0013511800207197666






Entropy of this k_epoch: 0.07464337348937988
Average policy_loss of this k_epoch: 0.002050943672657013
KL Divergence Average Loss: 0.03235448896884918
Total Loss of this k_epoch: -0.0013576801866292953






Entropy of this k_epoch: 0.07035667449235916
Average policy_loss of this k_epoch: 0.0018961578607559204
KL Divergence Average Loss: 0.029134396463632584
Total Loss of this k_epoch: -0.0013303318992257118


Entropy of this k_epoch: 0.07493752241134644
Average policy_loss of this k_epoch: 0.0020717084407806396
KL Divergence Average Loss: 0.03311131149530411
Total Loss of this k_epoch: -0.0013440547045320272


Entropy of this k_epoch: 0.08136717230081558
Average policy_loss of this k_epoch: 0.0023230761289596558
KL Divergence Average Loss: 0.03876706212759018
Total Loss of this k_epoch: -0.0013576119672507048






Entropy of this k_epoch: 0.07852970063686371
Average policy_loss of this k_epoch: 0.002204451709985733
KL Divergence Average Loss: 0.036142051219940186
Total Loss of this k_epoch: -0.0013606131542474031


Entropy of this k_epoch: 0.08277243375778198
Average policy_loss of this k_epoch: 0.0023758262395858765
KL Divergence Average Loss: 0.03997182473540306
Total Loss of this k_epoch: -0.0013630774337798357






Entropy of this k_epoch: 0.08269305527210236
Average policy_loss of this k_epoch: 0.002371273934841156
KL Divergence Average Loss: 0.039806757122278214
Total Loss of this k_epoch: -0.001365311210975051






Entropy of this k_epoch: 0.08575219660997391
Average policy_loss of this k_epoch: 0.002484053373336792
KL Divergence Average Loss: 0.04270041733980179
Total Loss of this k_epoch: -0.0013765522744506598


Entropy of this k_epoch: 0.08917048573493958
Average policy_loss of this k_epoch: 0.002628777176141739
KL Divergence Average Loss: 0.04592009261250496
Total Loss of this k_epoch: -0.0013705461751669645


Entropy of this k_epoch: 0.09135901927947998
Average policy_loss of this k_epoch: 0.0027191415429115295
KL Divergence Average Loss: 0.04830294847488403
Total Loss of this k_epoch: -0.0013657798990607262






Entropy of this k_epoch: 0.09325088560581207
Average policy_loss of this k_epoch: 0.0028005875647068024
KL Divergence Average Loss: 0.050160832703113556
Total Loss of this k_epoch: -0.0013603486586362123


Entropy of this k_epoch: 0.08999469876289368
Average policy_loss of this k_epoch: 0.0026766471564769745
KL Divergence Average Loss: 0.04754197597503662
Total Loss of this k_epoch: -0.0013476680032908916






Entropy of this k_epoch: 0.09446312487125397
Average policy_loss of this k_epoch: 0.0028439685702323914
KL Divergence Average Loss: 0.05120411515235901
Total Loss of this k_epoch: -0.0013671466149389744






Entropy of this k_epoch: 0.09069637209177017
Average policy_loss of this k_epoch: 0.002704441547393799
KL Divergence Average Loss: 0.04814552888274193
Total Loss of this k_epoch: -0.0013489217963069677


Entropy of this k_epoch: 0.09153619408607483
Average policy_loss of this k_epoch: 0.0027436204254627228
KL Divergence Average Loss: 0.04893716424703598
Total Loss of this k_epoch: -0.0013438176829367876


Entropy of this k_epoch: 0.08656421303749084
Average policy_loss of this k_epoch: 0.002525642514228821
KL Divergence Average Loss: 0.04356187954545021
Total Loss of this k_epoch: -0.0013669496402144432






Entropy of this k_epoch: 0.08144034445285797
Average policy_loss of this k_epoch: 0.0023101046681404114
KL Divergence Average Loss: 0.03860393166542053
Total Loss of this k_epoch: -0.0013758735731244087


Entropy of this k_epoch: 0.08477939665317535
Average policy_loss of this k_epoch: 0.00246337428689003
KL Divergence Average Loss: 0.04211676865816116
Total Loss of this k_epoch: -0.0013544282410293818






Entropy of this k_epoch: 0.07817171514034271
Average policy_loss of this k_epoch: 0.0021838396787643433
KL Divergence Average Loss: 0.035358816385269165
Total Loss of this k_epoch: -0.0013711578212678432




Epoch 30/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.39it/s]


Entropy of this k_epoch: 0.07929551601409912
Average policy_loss of this k_epoch: 0.002231210470199585
KL Divergence Average Loss: 0.036816924810409546
Total Loss of this k_epoch: -0.001365395961329341


Entropy of this k_epoch: 0.07968482375144958
Average policy_loss of this k_epoch: 0.0022447295486927032
KL Divergence Average Loss: 0.03705349564552307
Total Loss of this k_epoch: -0.0013689766637980938

Last k_epoch stats:
Loss: -0.0013690 | Ratio: 0.9820422 | Entropy Term: 0.0796848


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  94%|█████████▍| 30/32 [02:42<00:10,  5.41s/it]

Entire Validation Dataset Accuracy: 0.9323| 179.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782, 0.1782,
        0.1782], device='cuda:0')





Entropy of this k_epoch: 0.07859726995229721
Average policy_loss of this k_epoch: 0.0652318000793457
KL Divergence Average Loss: 0.000621546059846878
Total Loss of this k_epoch: 0.06130814924836159






Entropy of this k_epoch: 0.06792758405208588
Average policy_loss of this k_epoch: 0.008056659251451492
KL Divergence Average Loss: 0.0005892082117497921
Total Loss of this k_epoch: 0.004666172433644533






Entropy of this k_epoch: 0.05494420975446701
Average policy_loss of this k_epoch: -0.018285181373357773
KL Divergence Average Loss: 0.001342526520602405
Total Loss of this k_epoch: -0.021018965169787407


Entropy of this k_epoch: 0.04749859869480133
Average policy_loss of this k_epoch: -0.018657229840755463
KL Divergence Average Loss: 0.002226269571110606
Total Loss of this k_epoch: -0.02100989781320095


Entropy of this k_epoch: 0.03571601212024689
Average policy_loss of this k_epoch: -0.019177976995706558
KL Divergence Average Loss: 0.004607075825333595
Total Loss of this k_epoch: -0.020917706191539764






Entropy of this k_epoch: 0.029615776613354683
Average policy_loss of this k_epoch: -0.019453125074505806
KL Divergence Average Loss: 0.00614203279837966
Total Loss of this k_epoch: -0.020872492343187332


Entropy of this k_epoch: 0.027213919907808304
Average policy_loss of this k_epoch: -0.019540000706911087
KL Divergence Average Loss: 0.006692247930914164
Total Loss of this k_epoch: -0.020833773538470268





Epoch 31/32 (Inner K-Epochs):  12%|█▎        | 8/64 [00:00<00:04, 12.18it/s]

Entropy of this k_epoch: 0.025828024372458458
Average policy_loss of this k_epoch: -0.019590549170970917
KL Divergence Average Loss: 0.007055136375129223
Total Loss of this k_epoch: -0.020811399444937706



[A


Entropy of this k_epoch: 0.023323381319642067
Average policy_loss of this k_epoch: -0.01968817226588726
KL Divergence Average Loss: 0.007848583161830902
Total Loss of this k_epoch: -0.02077585458755493


Entropy of this k_epoch: 0.02257891744375229
Average policy_loss of this k_epoch: -0.019715014845132828
KL Divergence Average Loss: 0.008052785880863667
Total Loss of this k_epoch: -0.02076343260705471






Entropy of this k_epoch: 0.021111374720931053
Average policy_loss of this k_epoch: -0.019771574065089226
KL Divergence Average Loss: 0.00853833556175232
Total Loss of this k_epoch: -0.02074175886809826


Entropy of this k_epoch: 0.01995278149843216
Average policy_loss of this k_epoch: -0.0198129303753376
KL Divergence Average Loss: 0.008889966644346714
Total Loss of this k_epoch: -0.020721668377518654


Entropy of this k_epoch: 0.018439941108226776
Average policy_loss of this k_epoch: -0.019875794649124146
KL Divergence Average Loss: 0.009472070261836052
Total Loss of this k_epoch: -0.020703069865703583






Entropy of this k_epoch: 0.018084503710269928
Average policy_loss of this k_epoch: -0.019879065454006195
KL Divergence Average Loss: 0.009566185995936394
Total Loss of this k_epoch: -0.020687628537416458


Entropy of this k_epoch: 0.01818731427192688
Average policy_loss of this k_epoch: -0.019872792065143585
KL Divergence Average Loss: 0.009587503038346767
Total Loss of this k_epoch: -0.020686281844973564


Entropy of this k_epoch: 0.01868451200425625
Average policy_loss of this k_epoch: -0.01986420899629593
KL Divergence Average Loss: 0.009342974051833153
Total Loss of this k_epoch: -0.020705005154013634






Entropy of this k_epoch: 0.019768744707107544
Average policy_loss of this k_epoch: -0.019821451976895332
KL Divergence Average Loss: 0.008943134918808937
Total Loss of this k_epoch: -0.020720457658171654


Entropy of this k_epoch: 0.017727579921483994
Average policy_loss of this k_epoch: -0.019891653209924698
KL Divergence Average Loss: 0.009712275117635727
Total Loss of this k_epoch: -0.020680909976363182


Entropy of this k_epoch: 0.01882605254650116
Average policy_loss of this k_epoch: -0.019853316247463226
KL Divergence Average Loss: 0.009309736080467701
Total Loss of this k_epoch: -0.020701522007584572






Entropy of this k_epoch: 0.018744757398962975
Average policy_loss of this k_epoch: -0.019856330007314682
KL Divergence Average Loss: 0.009312942624092102
Total Loss of this k_epoch: -0.02070043981075287


Entropy of this k_epoch: 0.018946051597595215
Average policy_loss of this k_epoch: -0.01984710618853569
KL Divergence Average Loss: 0.009253239259123802
Total Loss of this k_epoch: -0.020701875910162926


Entropy of this k_epoch: 0.020181775093078613
Average policy_loss of this k_epoch: -0.019802048802375793
KL Divergence Average Loss: 0.008817588910460472
Total Loss of this k_epoch: -0.02072296105325222






Entropy of this k_epoch: 0.01905331015586853
Average policy_loss of this k_epoch: -0.019842760637402534
KL Divergence Average Loss: 0.009244226850569248
Total Loss of this k_epoch: -0.02070298232138157


Entropy of this k_epoch: 0.021404393017292023
Average policy_loss of this k_epoch: -0.01975945755839348
KL Divergence Average Loss: 0.008442365564405918
Total Loss of this k_epoch: -0.020745253190398216


Entropy of this k_epoch: 0.021169384941458702
Average policy_loss of this k_epoch: -0.01976143941283226
KL Divergence Average Loss: 0.008540692739188671
Total Loss of this k_epoch: -0.020734502002596855






Entropy of this k_epoch: 0.0227896049618721
Average policy_loss of this k_epoch: -0.01971505954861641
KL Divergence Average Loss: 0.008013540878891945
Total Loss of this k_epoch: -0.02077440544962883


Entropy of this k_epoch: 0.02363085374236107
Average policy_loss of this k_epoch: -0.01967034861445427
KL Divergence Average Loss: 0.007717886008322239
Total Loss of this k_epoch: -0.020774712786078453


Entropy of this k_epoch: 0.023471908643841743
Average policy_loss of this k_epoch: -0.01967930980026722
KL Divergence Average Loss: 0.007773770950734615
Total Loss of this k_epoch: -0.020775167271494865






Entropy of this k_epoch: 0.0271427184343338
Average policy_loss of this k_epoch: -0.019534628838300705
KL Divergence Average Loss: 0.0067335469648242
Total Loss of this k_epoch: -0.020824430510401726


Entropy of this k_epoch: 0.026163285598158836
Average policy_loss of this k_epoch: -0.01957079768180847
KL Divergence Average Loss: 0.007056231610476971
Total Loss of this k_epoch: -0.020808398723602295


Entropy of this k_epoch: 0.028605658560991287
Average policy_loss of this k_epoch: -0.01947285607457161
KL Divergence Average Loss: 0.006389313377439976
Total Loss of this k_epoch: -0.02083924598991871






Entropy of this k_epoch: 0.030620407313108444
Average policy_loss of this k_epoch: -0.019401025027036667
KL Divergence Average Loss: 0.005770583637058735
Total Loss of this k_epoch: -0.02087433822453022


Entropy of this k_epoch: 0.04184293374419212
Average policy_loss of this k_epoch: -0.017649635672569275
KL Divergence Average Loss: 0.21774473786354065
Total Loss of this k_epoch: -0.017564335837960243


Entropy of this k_epoch: 0.033913541585206985
Average policy_loss of this k_epoch: -0.019241822883486748
KL Divergence Average Loss: 0.004955035634338856
Total Loss of this k_epoch: -0.0208879504352808






Entropy of this k_epoch: 0.04616766422986984
Average policy_loss of this k_epoch: -0.018374890089035034
KL Divergence Average Loss: 0.008537044748663902
Total Loss of this k_epoch: -0.020597903057932854


Entropy of this k_epoch: 0.040372781455516815
Average policy_loss of this k_epoch: -0.01895112544298172
KL Divergence Average Loss: 0.0036553111858665943
Total Loss of this k_epoch: -0.020933212712407112


Entropy of this k_epoch: 0.053129199892282486
Average policy_loss of this k_epoch: -0.01815624348819256
KL Divergence Average Loss: 0.005716084968298674
Total Loss of this k_epoch: -0.020755542442202568






Entropy of this k_epoch: 0.047575127333402634
Average policy_loss of this k_epoch: -0.018590744584798813
KL Divergence Average Loss: 0.0030618482269346714
Total Loss of this k_epoch: -0.02093888260424137


Entropy of this k_epoch: 0.053511735051870346
Average policy_loss of this k_epoch: -0.018363995477557182
KL Divergence Average Loss: 0.0016077656764537096
Total Loss of this k_epoch: -0.021023504436016083


Entropy of this k_epoch: 0.06110439822077751
Average policy_loss of this k_epoch: -0.01766742393374443
KL Divergence Average Loss: 0.00621885946020484
Total Loss of this k_epoch: -0.020660456269979477






Entropy of this k_epoch: 0.0629357248544693
Average policy_loss of this k_epoch: -0.01784009113907814
KL Divergence Average Loss: 0.001384038245305419
Total Loss of this k_epoch: -0.02097303792834282


Entropy of this k_epoch: 0.05559978634119034
Average policy_loss of this k_epoch: -0.01824599876999855
KL Divergence Average Loss: 0.0013973480090498924
Total Loss of this k_epoch: -0.021012013778090477


Entropy of this k_epoch: 0.06235116720199585
Average policy_loss of this k_epoch: -0.017683546990156174
KL Divergence Average Loss: 0.004470317158848047
Total Loss of this k_epoch: -0.020756401121616364






Entropy of this k_epoch: 0.06279847025871277
Average policy_loss of this k_epoch: -0.01784554310142994
KL Divergence Average Loss: 0.0015618997858837247
Total Loss of this k_epoch: -0.020969849079847336


Entropy of this k_epoch: 0.05741693079471588
Average policy_loss of this k_epoch: -0.015179641544818878
KL Divergence Average Loss: 0.0014181286096572876
Total Loss of this k_epoch: -0.018036305904388428


Entropy of this k_epoch: 0.05106053501367569
Average policy_loss of this k_epoch: -0.018476640805602074
KL Divergence Average Loss: 0.0018958636792376637
Total Loss of this k_epoch: -0.021010709926486015






Entropy of this k_epoch: 0.04588346928358078
Average policy_loss of this k_epoch: -0.01873304694890976
KL Divergence Average Loss: 0.0026692005340009928
Total Loss of this k_epoch: -0.02100052870810032


Entropy of this k_epoch: 0.04343646764755249
Average policy_loss of this k_epoch: -0.018851477652788162
KL Divergence Average Loss: 0.0030543042812496424
Total Loss of this k_epoch: -0.020992757752537727


Entropy of this k_epoch: 0.041614703834056854
Average policy_loss of this k_epoch: -0.018931079655885696
KL Divergence Average Loss: 0.0033692799042910337
Total Loss of this k_epoch: -0.02097812108695507






Entropy of this k_epoch: 0.03828872740268707
Average policy_loss of this k_epoch: -0.019090846180915833
KL Divergence Average Loss: 0.004011193756014109
Total Loss of this k_epoch: -0.020965170115232468


Entropy of this k_epoch: 0.03593474626541138
Average policy_loss of this k_epoch: -0.01917726919054985
KL Divergence Average Loss: 0.0044669052585959435
Total Loss of this k_epoch: -0.020929336547851562


Entropy of this k_epoch: 0.03499099612236023
Average policy_loss of this k_epoch: -0.019200380891561508
KL Divergence Average Loss: 0.004812290892004967
Total Loss of this k_epoch: -0.02090180665254593






Entropy of this k_epoch: 0.03361758217215538
Average policy_loss of this k_epoch: -0.019254866987466812
KL Divergence Average Loss: 0.004984727595001459
Total Loss of this k_epoch: -0.020885897800326347


Entropy of this k_epoch: 0.03432702273130417
Average policy_loss of this k_epoch: -0.019247863441705704
KL Divergence Average Loss: 0.0048939259722828865
Total Loss of this k_epoch: -0.020915275439620018


Entropy of this k_epoch: 0.03734393045306206
Average policy_loss of this k_epoch: -0.019077956676483154
KL Divergence Average Loss: 0.03045783005654812
Total Loss of this k_epoch: -0.020640574395656586







Entropy of this k_epoch: 0.03235863149166107
Average policy_loss of this k_epoch: -0.019321322441101074
KL Divergence Average Loss: 0.005373400170356035
Total Loss of this k_epoch: -0.020885519683361053


Entropy of this k_epoch: 0.032261211425065994
Average policy_loss of this k_epoch: -0.019333116710186005
KL Divergence Average Loss: 0.00539631862193346
Total Loss of this k_epoch: -0.02089221403002739


Entropy of this k_epoch: 0.03456314280629158
Average policy_loss of this k_epoch: -0.019235309213399887
KL Divergence Average Loss: 0.004734094254672527
Total Loss of this k_epoch: -0.020916124805808067



Epoch 31/32 (Inner K-Epochs):  91%|█████████ | 58/64 [00:04<00:00, 12.33it/s][A


Entropy of this k_epoch: 0.03607950359582901
Average policy_loss of this k_epoch: -0.01917293108999729
KL Divergence Average Loss: 0.004391754977405071
Total Loss of this k_epoch: -0.020932989194989204


Entropy of this k_epoch: 0.03679565340280533
Average policy_loss of this k_epoch: -0.01912844181060791
KL Divergence Average Loss: 0.004293385893106461
Total Loss of this k_epoch: -0.020925290882587433






Entropy of this k_epoch: 0.03814607113599777
Average policy_loss of this k_epoch: -0.01906576007604599
KL Divergence Average Loss: 0.004067664034664631
Total Loss of this k_epoch: -0.020932387560606003


Entropy of this k_epoch: 0.03814130276441574
Average policy_loss of this k_epoch: -0.01908179558813572
KL Divergence Average Loss: 0.004132980480790138
Total Loss of this k_epoch: -0.02094753086566925


Entropy of this k_epoch: 0.03693768382072449
Average policy_loss of this k_epoch: -0.01912328600883484
KL Divergence Average Loss: 0.004332701675593853
Total Loss of this k_epoch: -0.020926842465996742




Epoch 31/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.42it/s]
>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  97%|█████████▋| 31/32 [02:48<00:05,  5.41s/it]

Entropy of this k_epoch: 0.04165609925985336
Average policy_loss of this k_epoch: -0.01890498958528042
KL Divergence Average Loss: 0.0033816725481301546
Total Loss of this k_epoch: -0.02095397748053074

Last k_epoch stats:
Loss: -0.0209540 | Ratio: 0.9972421 | Entropy Term: 0.0416561
Entire Validation Dataset Accuracy: 0.9375| 180.0 / 192.0 samples
old_predictions: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
batch_labels True Values: 
tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
discounted_rewards: 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0') Shape: torch.Size([64])
all_advantages_tensor: 
tensor([0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250,
        0.1250], device='cuda:0')





Entropy of this k_epoch: 0.04205523058772087
Average policy_loss of this k_epoch: 0.00010196119546890259
KL Divergence Average Loss: 0.00030076870461925864
Total Loss of this k_epoch: -0.0019977926276624203


Entropy of this k_epoch: 0.045076917856931686
Average policy_loss of this k_epoch: 0.00018647313117980957
KL Divergence Average Loss: 0.00029265997000038624
Total Loss of this k_epoch: -0.002064446220174432






Entropy of this k_epoch: 0.04768753796815872
Average policy_loss of this k_epoch: 0.0002850070595741272
KL Divergence Average Loss: 0.0006159838521853089
Total Loss of this k_epoch: -0.002093210117891431


Entropy of this k_epoch: 0.05035976693034172
Average policy_loss of this k_epoch: 0.000373300164937973
KL Divergence Average Loss: 0.0008347872644662857
Total Loss of this k_epoch: -0.0021363403648138046


Entropy of this k_epoch: 0.05271325260400772
Average policy_loss of this k_epoch: 0.00044977664947509766
KL Divergence Average Loss: 0.0010022944770753384
Total Loss of this k_epoch: -0.0021758631337434053






Entropy of this k_epoch: 0.05902436375617981
Average policy_loss of this k_epoch: 0.0006659403443336487
KL Divergence Average Loss: 0.0019836167339235544
Total Loss of this k_epoch: -0.002265441697090864


Entropy of this k_epoch: 0.06294545531272888
Average policy_loss of this k_epoch: 0.0008030124008655548
KL Divergence Average Loss: 0.0024249113630503416
Total Loss of this k_epoch: -0.00232001137919724


Entropy of this k_epoch: 0.0673941969871521
Average policy_loss of this k_epoch: 0.0009766742587089539
KL Divergence Average Loss: 0.003538635093718767
Total Loss of this k_epoch: -0.0023576493840664625






Entropy of this k_epoch: 0.0757763534784317
Average policy_loss of this k_epoch: 0.0012983940541744232
KL Divergence Average Loss: 0.00540545117110014
Total Loss of this k_epoch: -0.002436369191855192


Entropy of this k_epoch: 0.08718705177307129
Average policy_loss of this k_epoch: 0.0017744451761245728
KL Divergence Average Loss: 0.00944442581385374
Total Loss of this k_epoch: -0.0024904634337872267


Entropy of this k_epoch: 0.09357714653015137
Average policy_loss of this k_epoch: 0.002080567181110382
KL Divergence Average Loss: 0.012868945486843586
Total Loss of this k_epoch: -0.0024696008767932653






Entropy of this k_epoch: 0.09802533686161041
Average policy_loss of this k_epoch: 0.0022247061133384705
KL Divergence Average Loss: 0.01309514231979847
Total Loss of this k_epoch: -0.0025456096045672894


Entropy of this k_epoch: 0.10616055876016617
Average policy_loss of this k_epoch: 0.0026782676577568054
KL Divergence Average Loss: 0.019111353904008865
Total Loss of this k_epoch: -0.0024386467412114143


Entropy of this k_epoch: 0.10148545354604721
Average policy_loss of this k_epoch: 0.00237969309091568
KL Divergence Average Loss: 0.014971282333135605
Total Loss of this k_epoch: -0.0025448668748140335






Entropy of this k_epoch: 0.11390954256057739
Average policy_loss of this k_epoch: 0.003321312367916107
KL Divergence Average Loss: 0.03126969560980797
Total Loss of this k_epoch: -0.0020614678505808115


Entropy of this k_epoch: 0.11432603001594543
Average policy_loss of this k_epoch: 0.002988062798976898
KL Divergence Average Loss: 0.02135719358921051
Total Loss of this k_epoch: -0.0025146668776869774


Entropy of this k_epoch: 0.11745435744524002
Average policy_loss of this k_epoch: 0.0031191110610961914
KL Divergence Average Loss: 0.02283639833331108
Total Loss of this k_epoch: -0.0025252429768443108






Entropy of this k_epoch: 0.11481404304504395
Average policy_loss of this k_epoch: 0.0030206218361854553
KL Divergence Average Loss: 0.022112395614385605
Total Loss of this k_epoch: -0.002498956397175789


Entropy of this k_epoch: 0.11487555503845215
Average policy_loss of this k_epoch: 0.003019440919160843
KL Divergence Average Loss: 0.021909328177571297
Total Loss of this k_epoch: -0.002505243755877018


Entropy of this k_epoch: 0.1151357963681221
Average policy_loss of this k_epoch: 0.0030301809310913086
KL Divergence Average Loss: 0.022015128284692764
Total Loss of this k_epoch: -0.0025064575020223856






Entropy of this k_epoch: 0.11748954653739929
Average policy_loss of this k_epoch: 0.0031158439815044403
KL Divergence Average Loss: 0.0229296013712883
Total Loss of this k_epoch: -0.002529337303712964


Entropy of this k_epoch: 0.11718246340751648
Average policy_loss of this k_epoch: 0.0031195729970932007
KL Divergence Average Loss: 0.022767897695302963
Total Loss of this k_epoch: -0.0025118710473179817


Entropy of this k_epoch: 0.11257363855838776
Average policy_loss of this k_epoch: 0.0028928741812705994
KL Divergence Average Loss: 0.020228177309036255
Total Loss of this k_epoch: -0.002533526159822941






Entropy of this k_epoch: 0.11294909566640854
Average policy_loss of this k_epoch: 0.0029007643461227417
KL Divergence Average Loss: 0.020321790128946304
Total Loss of this k_epoch: -0.002543472684919834


Entropy of this k_epoch: 0.10630885511636734
Average policy_loss of this k_epoch: 0.0025977641344070435
KL Divergence Average Loss: 0.017127497121691704
Total Loss of this k_epoch: -0.0025464040227234364


Entropy of this k_epoch: 0.10632224380970001
Average policy_loss of this k_epoch: 0.0026009492576122284
KL Divergence Average Loss: 0.016918156296014786
Total Loss of this k_epoch: -0.0025459814351052046






Entropy of this k_epoch: 0.09889760613441467
Average policy_loss of this k_epoch: 0.0022598356008529663
KL Divergence Average Loss: 0.013675319030880928
Total Loss of this k_epoch: -0.002548291813582182


Entropy of this k_epoch: 0.09934736788272858
Average policy_loss of this k_epoch: 0.002288222312927246
KL Divergence Average Loss: 0.014036417007446289
Total Loss of this k_epoch: -0.0025387820787727833


Entropy of this k_epoch: 0.09993043541908264
Average policy_loss of this k_epoch: 0.0023113153874874115
KL Divergence Average Loss: 0.014000088907778263
Total Loss of this k_epoch: -0.00254520564340055






Entropy of this k_epoch: 0.09910282492637634
Average policy_loss of this k_epoch: 0.0022748783230781555
KL Divergence Average Loss: 0.013751452788710594
Total Loss of this k_epoch: -0.0025427485816180706


Entropy of this k_epoch: 0.09304050356149673
Average policy_loss of this k_epoch: 0.001983828842639923
KL Divergence Average Loss: 0.01097945962101221
Total Loss of this k_epoch: -0.0025584017857909203


Entropy of this k_epoch: 0.09288594126701355
Average policy_loss of this k_epoch: 0.0019964277744293213
KL Divergence Average Loss: 0.011017551645636559
Total Loss of this k_epoch: -0.002537693828344345






Entropy of this k_epoch: 0.09221172332763672
Average policy_loss of this k_epoch: 0.0019647032022476196
KL Divergence Average Loss: 0.01076632272452116
Total Loss of this k_epoch: -0.002538220025599003


Entropy of this k_epoch: 0.09465666115283966
Average policy_loss of this k_epoch: 0.0020742267370224
KL Divergence Average Loss: 0.01198987103998661
Total Loss of this k_epoch: -0.0025387078057974577


Entropy of this k_epoch: 0.0975215882062912
Average policy_loss of this k_epoch: 0.002188645303249359
KL Divergence Average Loss: 0.012823783792555332
Total Loss of this k_epoch: -0.002559196436777711






Entropy of this k_epoch: 0.09485459327697754
Average policy_loss of this k_epoch: 0.002084050327539444
KL Divergence Average Loss: 0.011922146193683147
Total Loss of this k_epoch: -0.002539458218961954


Entropy of this k_epoch: 0.09492477774620056
Average policy_loss of this k_epoch: 0.0020829886198043823
KL Divergence Average Loss: 0.011739583685994148
Total Loss of this k_epoch: -0.002545854775235057


Entropy of this k_epoch: 0.10029986500740051
Average policy_loss of this k_epoch: 0.002308078110218048
KL Divergence Average Loss: 0.014135422185063362
Total Loss of this k_epoch: -0.0025655608624219894






Entropy of this k_epoch: 0.0951659083366394
Average policy_loss of this k_epoch: 0.0020906776189804077
KL Divergence Average Loss: 0.012038446962833405
Total Loss of this k_epoch: -0.002547233598306775


Entropy of this k_epoch: 0.09525471180677414
Average policy_loss of this k_epoch: 0.002110358327627182
KL Divergence Average Loss: 0.012363998219370842
Total Loss of this k_epoch: -0.002528737299144268


Entropy of this k_epoch: 0.10110712051391602
Average policy_loss of this k_epoch: 0.002348296344280243
KL Divergence Average Loss: 0.014571664854884148
Total Loss of this k_epoch: -0.0025613433681428432






Entropy of this k_epoch: 0.1019211933016777
Average policy_loss of this k_epoch: 0.002390168607234955
KL Divergence Average Loss: 0.014934713020920753
Total Loss of this k_epoch: -0.002556544030085206


Entropy of this k_epoch: 0.10496942698955536
Average policy_loss of this k_epoch: 0.002518739551305771
KL Divergence Average Loss: 0.01619148999452591
Total Loss of this k_epoch: -0.002567817224189639


Entropy of this k_epoch: 0.1135711669921875
Average policy_loss of this k_epoch: 0.002913091331720352
KL Divergence Average Loss: 0.020231762900948524
Total Loss of this k_epoch: -0.0025631492026150227






Entropy of this k_epoch: 0.10549367964267731
Average policy_loss of this k_epoch: 0.0025605708360671997
KL Divergence Average Loss: 0.016432298347353935
Total Loss of this k_epoch: -0.0025497903116047382


Entropy of this k_epoch: 0.10688513517379761
Average policy_loss of this k_epoch: 0.0026031769812107086
KL Divergence Average Loss: 0.017032021656632423
Total Loss of this k_epoch: -0.002570759505033493


Entropy of this k_epoch: 0.10728625953197479
Average policy_loss of this k_epoch: 0.0026403814554214478
KL Divergence Average Loss: 0.017440704628825188
Total Loss of this k_epoch: -0.002549524651840329






Entropy of this k_epoch: 0.10679972171783447
Average policy_loss of this k_epoch: 0.002603478729724884
KL Divergence Average Loss: 0.01723654940724373
Total Loss of this k_epoch: -0.0025641419924795628


Entropy of this k_epoch: 0.10632495582103729
Average policy_loss of this k_epoch: 0.002590075135231018
KL Divergence Average Loss: 0.017139364033937454
Total Loss of this k_epoch: -0.002554778940975666


Entropy of this k_epoch: 0.10764531046152115
Average policy_loss of this k_epoch: 0.0026501938700675964
KL Divergence Average Loss: 0.01762799546122551
Total Loss of this k_epoch: -0.0025557915214449167






Entropy of this k_epoch: 0.10754898935556412
Average policy_loss of this k_epoch: 0.0042965225875377655
KL Divergence Average Loss: 0.32407376170158386
Total Loss of this k_epoch: 0.0021598106250166893


Entropy of this k_epoch: 0.10888662934303284
Average policy_loss of this k_epoch: 0.002694934606552124
KL Divergence Average Loss: 0.018224790692329407
Total Loss of this k_epoch: -0.0025671490002423525


Entropy of this k_epoch: 0.10783718526363373
Average policy_loss of this k_epoch: 0.0026565678417682648
KL Divergence Average Loss: 0.01769097149372101
Total Loss of this k_epoch: -0.0025583819951862097






Entropy of this k_epoch: 0.10803942382335663
Average policy_loss of this k_epoch: 0.0026725679636001587
KL Divergence Average Loss: 0.01793895661830902
Total Loss of this k_epoch: -0.002550013829022646


Entropy of this k_epoch: 0.11358572542667389
Average policy_loss of this k_epoch: 0.0029162876307964325
KL Divergence Average Loss: 0.02053321897983551
Total Loss of this k_epoch: -0.002557666739448905


Entropy of this k_epoch: 0.10693971067667007
Average policy_loss of this k_epoch: 0.002603549510240555
KL Divergence Average Loss: 0.01721169799566269
Total Loss of this k_epoch: -0.0025713189970701933






Entropy of this k_epoch: 0.10597619414329529
Average policy_loss of this k_epoch: 0.0025809109210968018
KL Divergence Average Loss: 0.016945138573646545
Total Loss of this k_epoch: -0.0025484473444521427


Entropy of this k_epoch: 0.10537080466747284
Average policy_loss of this k_epoch: 0.0025369487702846527
KL Divergence Average Loss: 0.01642553135752678
Total Loss of this k_epoch: -0.00256733619607985


Entropy of this k_epoch: 0.10284686088562012
Average policy_loss of this k_epoch: 0.0024324506521224976
KL Divergence Average Loss: 0.015695322304964066
Total Loss of this k_epoch: -0.002552939346060157






Entropy of this k_epoch: 0.10698112845420837
Average policy_loss of this k_epoch: 0.002619341015815735
KL Divergence Average Loss: 0.017076538875699043
Total Loss of this k_epoch: -0.0025589498691260815


Entropy of this k_epoch: 0.10478749871253967
Average policy_loss of this k_epoch: 0.0025076791644096375
KL Divergence Average Loss: 0.016095150262117386
Total Loss of this k_epoch: -0.0025707443710416555


Entropy of this k_epoch: 0.10463745146989822
Average policy_loss of this k_epoch: 0.0024969354271888733
KL Divergence Average Loss: 0.01594918593764305
Total Loss of this k_epoch: -0.002575445454567671




Epoch 32/32 (Inner K-Epochs): 100%|██████████| 64/64 [00:05<00:00, 12.41it/s]


Entropy of this k_epoch: 0.10390985757112503
Average policy_loss of this k_epoch: 0.0024606361985206604
KL Divergence Average Loss: 0.015799297019839287
Total Loss of this k_epoch: -0.0025768636260181665


Entropy of this k_epoch: 0.10383570194244385
Average policy_loss of this k_epoch: 0.0024707019329071045
KL Divergence Average Loss: 0.01583714969456196
Total Loss of this k_epoch: -0.0025627119466662407

Last k_epoch stats:
Loss: -0.0025627 | Ratio: 0.9802344 | Entropy Term: 0.1038357


>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>p): 100%|██████████| 32/32 [02:53<00:00,  5.41s/it]
Main Epoch (Outer Loop): 100%|██████████| 32/32 [02:53<00:00,  5.42s/it]

Entire Validation Dataset Accuracy: 0.9271| 178.0 / 192.0 samples
Training complete.





In [103]:
SAVE_LOCATION = "./models/trained_spam_model.pth"   # Define the model path and name of the trained model weights


torch.save(trained_policy.state_dict(), f=SAVE_LOCATION)

In [110]:
test_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/test.csv", tokenizer=tokenizer)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=args["dataloader_batch_size"], num_workers=args["dataloader_num_workers"], pin_memory=args["dataloader_pin_memory"], drop_last=True)


In [111]:
accuracy = evaluate_policy(trained_policy, test_dataloader, device='cuda')

Entire Dataset Accuracy: 0.9115 | 175.0 / 192.0 samples


In [112]:
input_text1 = "Hey, wanna go out to watch the new fantastic four movie?"
input_text2 = "XMAS Prize draws! We are trying to contact U. Todays draw shows that you have won a £2000 prize GUARANTEED. Call 09058094565 from land line. Valid 12hrs only"

In [None]:
def simple_spam_classify_single(Policy, input_text, tokenizer, device='cpu'):
    Policy.eval().to(device)
    Softmax_lyr = torch.nn.Softmax(dim=-1)

    tokenized_text = tokenizer.encode(input_text)
    torch_text=torch.tensor(tokenized_text).unsqueeze(0)    # turn into a tensor and add a batch dimension
    model_inputs = torch_text.to(device)
    # print(f"torch_text: {torch_text} | {torch_text.shape}")
    with torch.no_grad():
        logits = Policy(model_inputs)[:,-1,:]
        Class_probabilities = Softmax_lyr(logits)
    prediction = torch.argmax(input=Class_probabilities, dim=-1)
    # print(f"prediction: {prediction}")

    print("==================================================================")
    print(f"Classifiying the following text:")
    print(f"[SPAM || NOT SPAM]: \n'{input_text}'")
    print(f"Prediction ... [ => {'SPAM' if prediction.item() == 1 else 'NOT SPAM'} <= ]")
    print("==================================================================")
    


In [None]:
def simple_spam_classify_batch(Policy, input_text, tokenizer, device='cpu'):
    Policy.eval().to(device)
    Softmax_lyr = torch.nn.Softmax(dim=-1)

    tokenized_text = [
            tokenizer.encode(text) for text in input_text    # For each row in the text section of the pandas data frame tokenize the text string(sentence); creates list of token IDs for each example/item of the text data
        ]
    

    max_length = 0
    for encoded_text in tokenized_text:
        encoded_length = len(encoded_text)
        if encoded_length > max_length:
            max_length = encoded_length
    torch_text=torch.tensor(tokenized_text)    # turn into a tensor and add a batch dimension
    model_inputs = torch_text.to(device)
    # print(f"torch_text: {torch_text} | {torch_text.shape}")
    with torch.no_grad():
        logits = Policy(model_inputs)[:,-1,:]
        Class_probabilities = Softmax_lyr(logits)
    predictions = torch.argmax(input=Class_probabilities, dim=-1)
    print(f"predictions: {predictions}")

    bundle = zip(input_text, predictions.item())

    for i, (text_str, pred) in enumerate(bundle):
        print("==================================================================")
        print(f"Classifiying the following text:")
        print(f"[SPAM || NOT SPAM]: \n'{text_str}'")
        print(f"Prediction ... [ => {'SPAM' if pred == 1 else 'NOT SPAM'} <= ]")
        print("==================================================================")

In [129]:
device = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
text_batch = [input_text1, input_text2]

In [None]:
simple_spam_classify_batch(Policy=trained_policy, input_text=text_batch, tokenizer=tokenizer, device=device)


In [None]:
simple_spam_classify_single(Policy=trained_policy, input_text=input_text1, tokenizer=tokenizer, device=device)

torch_text: tensor([[10814,    11, 18869,   467,   503,   284,  2342,   262,   649,  9623,
          1440,  3807,    30]]) | torch.Size([1, 13])
prediction: tensor([0], device='cuda:0')
Classifiying the following text:
[SPAM || NOT SPAM]: 
'Hey, wanna go out to watch the new fantastic four movie?'
Prediction ... [ => NOT SPAM <= ]


In [None]:
def main(args):
    print("Setting up for Training")
    
    if args.device:     # Check if the user specified to use a CPU or GPU for training
        device = args.device
    else:
        if args.use_cuda:   # Check if the user wanted to use CUDA if available.
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    BASE_CONFIG = {
        "vocab_size": 50257,     # Vocabulary size
        "context_length": 1024,  # Context length
        "drop_rate": 0.1,        # Dropout rate
        "qkv_bias": True         # Query-key-value bias
    }

    # Transfer to argparser setup
    gpt_size="gpt2-small (124M)"
    dataloader_batch_size=64
    num_workers=0
    pin_memory=True
    tokenizer=tiktoken.get_encoding("gpt2")

    print("Creating Datasets using train, test, and validation files.")

    prepare_datasets(data_file_path="./sms_spam_collection/SMSSpamCollection.tsv", store_directory="./sms_spam_collection/data_splits")

    train_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/train.csv", tokenizer=tokenizer)
    test_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/test.csv", tokenizer=tokenizer)
    validation_dataset = SpamDataset(csv_file="./sms_spam_collection/data_splits/validation.csv", tokenizer=tokenizer)
    
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=dataloader_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=True)
    test_dataloader = DataLoader(dataset=test_dataset, batch_size=dataloader_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=True)
    validation_dataloader = DataLoader(dataset=validation_dataset, batch_size=dataloader_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=True)

    print("Beginning Training Script")
    start_time=time.time()

    trained_policy = grpo_train(
        model_config=BASE_CONFIG,
        train_dataloader=train_dataloader,
        validation_dataloader=validation_dataloader,
        gpt_size=gpt_size,
        epochs=args.epochs,
        learning_rate=args.learning_rate,
        batch_size=args.batch_size, # Significantly larger batch size recommended for stability
        k_epochs=args.k_epochs,
        epsilon=args.epsilon,
        beta_kl=args.beta_kl,
        entropy_coeff=args.entropy_coeff,
        log_iterations=args.log_iterations,
        gamma=args.gamma,
        device=device,
        num_envs=args.num_envs
    )
    end_time=time.time()

    elapsed_time= end_time - start_time
    hrs = int(elapsed_time / 3600)
    min = int((elapsed_time % 3600) / 60)
    seconds_remaining = elapsed_time - (hrs * 3600 ) - (min * 60)
    print(f"FINISHED MODEL TRAINING. \nTRAINING TOOK: {hrs} Hours, {min} Minutes, and {seconds_remaining} Seconds")


    print("\nTesting the trained policy:")

    test_dataset_accuracy = evaluate_policy(trained_policy, test_dataloader, current_epoch=None, max_epochs=None, device=device)

 #---------------  !!!  ---------------
    SAVE_LOCATION = "./model/trained_model.pth"   # Define the model path and name of the trained model weights

    if args.save_model:     # Check if the user wants to save the trained model weights
        if args.model_output_path:     # Check if the user specified a target save location
            SAVE_LOCATION=args.model_output_path
        
        torch.save(trained_policy.parameters(), f=SAVE_LOCATION)
        print(f"Model weights saved in: {SAVE_LOCATION}")

    print("Finished Running Script")

In [None]:
# Example usage (assuming you have a way to call this function, e.g., in a main block)
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train and test a BlackJack PPO agent.")

    # Add arguments
    parser.add_argument('--epochs', type=int, default=2000,
                        help='Number of training epochs.')
    parser.add_argument('--learning_rate', type=float, default=0.0003,
                        help='Learning rate for the optimizer.')
    parser.add_argument('--dataloader_batch_size', type=int, default=64,
                        help='Dataloader Batch sizes for train, test, validation data files.')
    parser.add_argument('--batch_size', type=int, default=1024,
                        help='Batch size for training.')
    parser.add_argument('--gpt2_size', type=str, default="gpt2-small (124M)",
                        help='GPT2 size for model construction.')
    parser.add_argument('--k_epochs', type=int, default=128,
                        help='Number of policy update epochs per trajectory collection.')
    parser.add_argument('--epsilon', type=float, default=0.2,
                        help='Clipping parameter for PPO.')
    parser.add_argument('--beta_kl', type=float, default=0.01,
                        help='KL divergence coefficient (for PPO-like algorithms).')
    parser.add_argument('--entropy_coeff', type=float, default=0.001,
                        help='Entropy regularization coefficient.')
    parser.add_argument('--log_iterations', type=int, default=100,
                        help='Log training progress every N iterations.')
    parser.add_argument('--gamma', type=float, default=0.99,
                        help='Discount factor for rewards.')
    parser.add_argument('--num_envs', type=int, default=16,
                        help='Number of parallel environments for training.')
    parser.add_argument('--use_cuda', action='store_true',
                        help='Use CUDA if available.')
    parser.add_argument('--device', type=str, default='cpu',
                        help='Explicitly set device (e.g., "cpu, cuda:0", "cpu"). Overrides --use_cuda if specified.')
    parser.add_argument('--save_model', action='store_true',
                        help='Save the trained model weights.')
    parser.add_argument('--model_output_path', type=str, default='blackjack_policy_model.pth',
                        help='Path to save the trained model weights.')

    # Parse the arguments
    args = parser.parse_args()

    
    main(args)