In [1]:
import deepsudoku
import os
import gymnasium as gym
import torch
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import deepsudoku.reinforcement_learning
from captum.attr import IntegratedGradients

os.chdir(deepsudoku.REPO_PATH)

from reinforcement_exp.single_action_architectures import *

Sudoku Environment avaible at gymnasium as 'Sudoku-v0'.


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# see https://github.com/yukimasano/linear-probes/blob/master/eval_linear_probes.py

class Probes(nn.Module):
    def __init__(self, trunk, probed_layers):
        super(Probes, self).__init__()
        self.probed_layers = probed_layers
        self.trunk = trunk
        self.probes = nn.ModuleList()
        for index, name in enumerate(trunk.children()):
            x = name.forward(x)
            print(f'Visiting layer {index: 3d}: {name} [shape: {x.shape}] ()')
            if index in self.probed_layers:
                # Downsampler + Linear classifier
                # we maybe just want a simple supervised approach

                # Attach Probe
                self.probes.append(nn.Sequential(downsampler, bn, predictor))
                print(f"Attaching linear probe to layer {index: 3d}: "
                      f"{name}) with size reduction {x_volume} -> {y_volume} ({downsampler})")

    def forward(self, x):
        # pass through all probes and just concat ouput
        outputs = []
        for index, (name, layer) in enumerate(self.trunk.named_children()):
            x = layer.forward(x)
            if index in self.probed_layers:
                probe_index = self.probed_layers.index(index)
                y = self.probes[probe_index](x).squeeze()
                outputs += [y]
        return outputs

    def lp_parameters(self):
        return self.probes.parameters()


def optimize_linprobes(probes, data):
        crossentropy = nn.CrossEntropyLoss().to('cuda:0')
        model = probes.to('cuda:0')

        optimizer = torch.optim.SGD(model.lp_parameters()) # we optimize all probe parameters in one pass

        for epoch in range(first_epoch, 1 if self.validate_only else self.num_epochs):
            for iter, (input, label) in enumerate(data):
                input = input.to('cuda:0').cuda()
                label = label.to('cuda:0')
                total_loss = None
                predictions = model(input)

            for i, prediction in enumerate(predictions):
                loss = criterion(prediction, label)
                if total_loss is None:
                    total_loss = loss
                else:
                    total_loss = total_loss + loss
                
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        return model

        



In [12]:
env = gym.make("Sudoku-x2")

agent = OnlyConvSeperateValue(True)
agent.load_state_dict(torch.load("reinforcement_exp/runs/Sudoku-x2__Best_Model_x2_easy__1__1693322907/final_model.pth",
                                    map_location=torch.device('cpu')))
agent.eval()

# agent.actor  = Conv2d(10, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#                ReLU()
#                Conv2d(16, 32, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
#                ReLU()
#                Conv2d(32, 9, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))

linprobes = Probes(agent.actor, [0,1,2,3])


#obs_, _ = env.reset()
#obs = torch.tensor(obs_)[None,:].float()
#obs = torch.nn.functional.one_hot(obs.to(torch.int64), 10)
#obs = obs.float()

# get actual decision
#act = agent.get_greedy_action(torch.tensor(obs_)[None,:].float())

Conv2d(10, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU()
Conv2d(16, 32, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
ReLU()
Conv2d(32, 9, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
