In [1]:
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F

# this is funtional gaussian noise implimentation due to it being tensor it improve single agent rl such as dqn
class NoisyNet(nn.Module):
    def __init__(self, in_features, out_features, sigmaparam=0.4):#sigma is σi,j for all param where 3.2 INITIALISATION OF NOISY NETWORKS in the paper 0.017(for indimendent gaussain distri)
      super(NoisyNet, self).__init__()
      self.in_features = in_features
      self.out_features = out_features
      self.sigmaparam = sigmaparam

      # learnable param (sigma and mu)

      self.weight_mu = nn.Parameter(torch.empty(out_features, in_features))
      self.bias_mu = nn.Parameter(torch.empty(out_features))
      self.sigma_weight = nn.Parameter(torch.empty(out_features, in_features))
      self.sigma_bias = nn.Parameter(torch.empty(out_features))

      # noise param using a distribution

      self.register_buffer("epsilon_weight_middle", torch.empty(out_features, in_features))
      self.register_buffer("epsilon_bias", torch.empty(out_features))
      self.reset_parameters()
      self.factorized_noise()


    def reset_parameters(self):
      # xavier uniform due to DQN sigma activation function for actions
      mu_range = 1 / (self.in_features ** 0.5)
      self.weight_mu.data.uniform_(-mu_range, mu_range)
      self.bias_mu.data.uniform_(-mu_range, mu_range)
      self.sigma_weight.data.fill_(self.sigmaparam)
      self.sigma_bias.data.fill_(self.sigmaparam)
    def noise(self,size):
      factor_noise = torch.randn(size, device=self.weight_mu.device)
      return factor_noise.sign().mul_(factor_noise.abs().sqrt_()) #this is the method they used in the paper f(x) = sign(x) * sqrt(abs(x))

    def factorized_noise(self):
      epsilon_in = self.noise(self.in_features)
      epsilon_out = self.noise(self.out_features)
      self.epsilon_weight = self.epsilon_weight_middle.copy_(epsilon_out.ger(epsilon_in))
      self.epsilon_bias.copy_(epsilon_out)
    def forward(self, input):
      '''
      Jason change this please, I am not sure how you defined trianing = true or false
      this return currently present is for training = true where there are presence of noise ie. sigma*epsilon

      For evaluation you will need to only return F.linear(input, self.weight_mu, self.bias_mu)
      see below
      '''
      noisy_weigth = self.weight_mu + (self.sigma_weight * self.epsilon_weight)
      noisy_bias = self.bias_mu + (self.sigma_bias * self.epsilon_bias)
      return F.linear(input, noisy_weigth, noisy_bias)




    # def forward(self, input):
    #   if self.training:
    #       # (Optional) Resample noise on every forward pass
    #     self.factorized_noise()

    #     noisy_weight = self.weight_mu + (self.sigma_weight * self.epsilon_weight)
    #     noisy_bias   = self.bias_mu   + (self.sigma_bias   * self.epsilon_bias)

    #     return F.linear(input, noisy_weight, noisy_bias)
    #   else:
    #     # In evaluation mode, we typically want deterministic (noise-free) output
    #     return F.linear(input, self.weight_mu, self.bias_mu)

In [2]:
class SimpleModel(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(SimpleModel, self).__init__()
        # Replace nn.Linear with NoisyNetIndependent
        self.fc1 = NoisyNet(in_dim, hidden_dim)
        self.fc2 = NoisyNet(hidden_dim, out_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def main():
    # Create a random tensor with batch_size=4 and in_dim=3
    input_data = torch.randn(4, 3)

    # Initialize our model
    model = SimpleModel(in_dim=3, hidden_dim=5, out_dim=2)

    print("=== Training Mode (model.train()) ===")
    model.train()
    output_train = model(input_data)
    print("Output (training):\n", output_train, "\n")

    print("=== Evaluation Mode (model.eval()) ===")
    model.eval()
    output_eval = model(input_data)
    print("Output (evaluation):\n", output_eval, "\n")

if __name__ == "__main__":
    main()

=== Training Mode (model.train()) ===
Output (training):
 tensor([[-0.5874,  0.4294],
        [-1.9996,  1.8166],
        [-1.0446,  0.5830],
        [-3.4165,  2.9409]], grad_fn=<AddmmBackward0>) 

=== Evaluation Mode (model.eval()) ===
Output (evaluation):
 tensor([[-0.5874,  0.4294],
        [-1.9996,  1.8166],
        [-1.0446,  0.5830],
        [-3.4165,  2.9409]], grad_fn=<AddmmBackward0>) 



In [4]:
class CNN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(CNN, self).__init__()
        # greyscale Image is(stack,height,width)
        stack, height, width = input_shape
        self.conv = nn.Sequential(
            nn.Conv2d(stack,16,kernel_size=4, stride=2, padding=1),
            nn.ReLU(),

            nn.Conv2d(16,32,kernel_size=3),
            nn.ReLU(),

            nn.Conv2d(32,64, kernel_size=2),
            nn.ReLU(),
        )

        # This is for finding the size to dense more robust compared to decision manually
        with torch.no_grad():
                # Torch uses(1,channels,height,width)
                test = torch.zeros(1, stack, height, width)
                find_conv_size = self.conv(test)
                conv_size = find_conv_size.numel()
        '''
        This is the line to change for the CNN

        '''
        # self.out1 = nn.Linear(conv_size,num_actions)
        self.out1 = NoisyNet(conv_size,num_actions)


    def forward(self,x):
        x = self.conv(x)
        x = torch.flatten(x, start_dim=1)
        x = self.out1(x)
        return x



def main():
    # Suppose your input shape is (stack=1, height=84, width=84)
    input_shape = (1, 84, 84)
    num_actions = 4

    # Instantiate the CNN with NoisyNet
    model = CNN(input_shape, num_actions)

    # Switch to train mode
    model.train()
    train_input = torch.randn(2, *input_shape)  # batch_size=2, shape=(2,1,84,84)
    train_output = model(train_input)
    print("=== Training Mode (model.train()) ===")
    print("Output (training):")
    print(train_output, "\n")

    # Switch to eval mode
    model.eval()
    eval_input = torch.randn(2, *input_shape)  # new random input
    eval_output = model(eval_input)
    print("=== Evaluation Mode (model.eval()) ===")
    print("Output (evaluation):")
    print(eval_output, "\n")


if __name__ == "__main__":
    main()

=== Training Mode (model.train()) ===
Output (training):
tensor([[ -2.3016,   4.0164,   5.4170, -11.3774],
        [ -1.3299,   2.3595,   3.2125,  -6.6374]], grad_fn=<AddmmBackward0>) 

=== Evaluation Mode (model.eval()) ===
Output (evaluation):
tensor([[ -2.9934,   5.2817,   7.1703, -15.0740],
        [ -0.9288,   1.6107,   2.2091,  -4.6028]], grad_fn=<AddmmBackward0>) 

