# Notebook para código de pruebas, checks y anotaciones

Chequear si las probabilidades aleatorias del choose_action() funcionan bien.

In [2]:
import numpy as np

"""probs = [[0.0040233,  0.847307,   0.14866969],
 [0.7979448,  0.15856983, 0.0434854 ],
 [0.5554874 , 0.34591073 ,0.09860188],
 [0.2927103  ,0.50127864, 0.2060111 ],
 [0.11640792, 0.45406437 ,0.4295277 ]]
"""
probs = [[0.48661566, 0.42588234, 0.087502  ],
 [0.19536212, 0.3296347 , 0.47500318],
 [0.37989447 ,0.37718463 ,0.24292094],
 [0.2112374 , 0.5596584 , 0.22910419],
 [0.31614146, 0.39353094, 0.2903276 ]]

sum_probs = []
for p in probs:
    sum_probs.append(np.sum(p))

print(sum_probs)

[1.0, 1.0, 1.00000004, 0.9999999900000001, 1.0]


## Creando la red encargada de distinguir herramientas

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os

class ObserverNetwork(nn.Module):
    # Devuelve la acción a tomar en función del estado
    def __init__(self, input_dims, n_actions, n_choices_per_finger, conv_channels=[16, 32, 64], hidden_size=256, name='observer', checkpoint_dir='Desarrollo/simulation/Env03/tmp/observer', learning_rate=0.001):
        super(ObserverNetwork, self).__init__()
        self.input_dims = input_dims
        self.checkpoint_dir = checkpoint_dir
        self.name = name
        self.checkpoint_file = os.path.join(self.checkpoint_dir, name+'_td3')

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=conv_channels[0], kernel_size=5, stride=2, padding=2)
        self.conv2 = nn.Conv2d(in_channels=conv_channels[0], out_channels=conv_channels[1], kernel_size=5, stride=2, padding=2)
        self.conv3 = nn.Conv2d(in_channels=conv_channels[1], out_channels=conv_channels[2], kernel_size=5, stride=2, padding=2)
        self.fc1 = nn.Linear(conv_channels[2] * (input_dims[0] // 8) * (input_dims[1] // 8), hidden_size)
        self.fc2 = nn.Linear(hidden_size, 5)
        self.fc3 = nn.Linear(5, 1)
        
        self.optimizer = optim.AdamW(self.parameters(), lr=learning_rate)
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        print(f"Created Actor Network on device: {self.device}")
        self.to(self.device)
        

    def forward(self, img):
        img = img.to(self.device)
        x = F.leaky_relu(self.conv1(img))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        #print(f"Shape after conv3: {x.shape}")
        # Check if the input is a batch or a single image
        if len(x.shape) == 4:  # Batch case: [batch_size, channels, height, width]
            x = x.reshape((x.size(0), -1))  # Flatten each sample in the batch
        elif len(x.shape) == 3:  # Single image case: [channels, height, width]
            x = x.reshape(-1)  # Flatten the single image
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))
        tool_reg = F.leaky_relu(self.fc3(x))
            
        return tool_reg # Tool regresion

    def save_checkpoint(self):
        torch.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        self.load_state_dict(torch.load(self.checkpoint_file))