In [3]:
import gym
import gym_chess

# Create the chess environment
env = gym.make('Chess-v0')

# Reset the environment (initialize the board)
state = env.reset()

# Display the initial board state
print(env.render())

# Sample a random action (move) from the list of legal moves
action = env.action_space.sample()

# Take that action in the environment and receive feedback
next_state, reward, done, info = env.step(action)

# Display the new board state after the move
print(env.render())

# Check if the game is over
if done:
    print("Game over!")
else:
    print("Game continues.")


♜ ♞ ♝ ♛ ♚ ♝ ♞ ♜
♟ ♟ ♟ ♟ ♟ ♟ ♟ ♟
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
♙ ♙ ♙ ♙ ♙ ♙ ♙ ♙
♖ ♘ ♗ ♕ ♔ ♗ ♘ ♖


AttributeError: 'NoneType' object has no attribute 'sample'

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Actor Network
# This network is responsible for deciding which action (chess move) to take given the current state.
class Actor(nn.Module):
    def __init__(self, input_size, output_size):
        super(Actor, self).__init__()
        
        # Input layer to the first fully connected (FC) hidden layer with 128 neurons
        self.fc1 = nn.Linear(input_size, 128)
        
        # Second FC layer with 128 neurons
        self.fc2 = nn.Linear(128, 128)
        
        # Output layer which will output the action probabilities (softmax) over all possible actions
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        # Pass the input state through the first FC layer and apply ReLU activation
        x = F.relu(self.fc1(x))
        
        # Pass the result through the second FC layer and apply ReLU activation
        x = F.relu(self.fc2(x))
        
        # Output the action probabilities using softmax to ensure they sum to 1
        return F.softmax(self.fc3(x), dim=-1)

# Critic Network
# This network evaluates the "value" of the current state (i.e., how good or bad the state is).
class Critic(nn.Module):
    def __init__(self, input_size):
        super(Critic, self).__init__()
        
        # Input layer to the first fully connected (FC) hidden layer with 128 neurons
        self.fc1 = nn.Linear(input_size, 128)
        
        # Second FC layer with 128 neurons
        self.fc2 = nn.Linear(128, 128)
        
        # Output layer which outputs a single scalar value representing the value of the state
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        # Pass the input state through the first FC layer and apply ReLU activation
        x = F.relu(self.fc1(x))
        
        # Pass the result through the second FC layer and apply ReLU activation
        x = F.relu(self.fc2(x))
        
        # Output the value of the state (no activation function here)
        return self.fc3(x)


In [3]:
# Initialize the actor and critic networks
input_size = 768  # Example input size (board state as a flat vector)
output_size = 4672  # Example output size (number of possible moves in chess)

# Create an instance of the Actor and Critic
actor_net = Actor(input_size=input_size, output_size=output_size)
critic_net = Critic(input_size=input_size)

# Test with a dummy state (e.g., a flat vector representing the chessboard)
dummy_state = torch.rand(1, input_size)  # A batch of one random state

# Pass the dummy state through the Actor to get the action probabilities
action_probs = actor_net(dummy_state)
print(f"Action probabilities: {action_probs}")

# Pass the dummy state through the Critic to get the value of the state
state_value = critic_net(dummy_state)
print(f"State value: {state_value}")


Action probabilities: tensor([[0.0002, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002]],
       grad_fn=<SoftmaxBackward0>)
State value: tensor([[-0.0447]], grad_fn=<AddmmBackward0>)
