In [4]:
import torch
import torch.nn as nn
import numpy as np
from utils import State, load_data
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Preprocessing function with new normalization
def preprocess_state(state: State):
    board = state.board
    local_status = state.local_board_status
    
    # Initialize input tensor with 4 channels (9x9 each)
    input_tensor = np.zeros((4, 9, 9), dtype=np.float32)
    
    # Channel 1: 9x9 board (0 -> 0, 1 -> 1, 2 -> -1)
    for i in range(3):
        for j in range(3):
            start_row, start_col = i * 3, j * 3
            block = board[i, j]
            # Normalize: 1 -> 1, 2 -> -1, 0 -> 0
            normalized_block = np.where(block == 1, 1, np.where(block == 2, -1, 0))
            input_tensor[0, start_row:start_row+3, start_col:start_col+3] = normalized_block
    
    # Channel 2: Fill number (1 -> 1, 2 -> -1)
    fill_num_normalized = 1 if state.fill_num == 1 else -1
    input_tensor[1, :, :] = fill_num_normalized
    
    # Channel 3: Local board status (0 -> 0, 1 -> 1, 2 -> -1, 3 -> 0)
    for i in range(3):
        for j in range(3):
            start_row, start_col = i * 3, j * 3
            status = local_status[i, j]
            # Normalize: 1 -> 1, 2 -> -1, 0 -> 0, 3 -> 0
            normalized_status = 1 if status == 1 else (-1 if status == 2 else 0)
            input_tensor[2, start_row:start_row+3, start_col:start_col+3] = normalized_status
    
    # Channel 4: Valid actions (1 -> 1, 0 -> 0)
    valid_actions = state.get_all_valid_actions()
    for (i, j, k, l) in valid_actions:
        row = i * 3 + k
        col = j * 3 + l
        input_tensor[3, row, col] = 1.0  # Already fits the scheme (1 for valid, 0 otherwise)
    
    return input_tensor

In [6]:
class EvaluationNet(nn.Module):
    def __init__(self):
        super(EvaluationNet, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(4, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(0.1),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.AdaptiveAvgPool2d(1)
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(128, 768),  # (128) -> (768)
            nn.BatchNorm1d(768),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.3),
            nn.Linear(768, 192),  # (768) -> (192)
            nn.BatchNorm1d(192),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.2),
            nn.Linear(192, 1),    # (192) -> (1)
            nn.Tanh()
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten: (batch, 256, 1, 1) -> (batch, 256)
        x = self.fc_layers(x)
        return x

# Calculate parameters and size
model = EvaluationNet()
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")
print(f"Estimated size: {num_params * 4 / 1024 / 1024:.2f} MB")

Number of parameters: 346897
Estimated size: 1.32 MB


In [8]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Load and preprocess data
data = load_data()
X = np.array([preprocess_state(state) for state, _ in data])
y = np.array([value for _, value in data]).reshape(-1, 1)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.15, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)



In [9]:
model = EvaluationNet().to(device)
criterion = nn.SmoothL1Loss()  # Robust loss for regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

num_epochs = 100
best_val_loss = float('inf')
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item() * batch_X.size(0)
    
    avg_train_loss = total_train_loss / len(train_dataset)
    
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
    
    scheduler.step(val_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {val_loss.item():.6f}")
    
    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        torch.save(model.state_dict(), 'best_model.pt')
    
    if val_loss.item() < 0.05:
        print("Target loss achieved!")
        break

Epoch 1/100, Train Loss: 0.136933, Val Loss: 0.128893
Epoch 2/100, Train Loss: 0.121394, Val Loss: 0.115840
Epoch 3/100, Train Loss: 0.113403, Val Loss: 0.122323
Epoch 4/100, Train Loss: 0.104340, Val Loss: 0.112974
Epoch 5/100, Train Loss: 0.097144, Val Loss: 0.108098
Epoch 6/100, Train Loss: 0.091142, Val Loss: 0.105538
Epoch 7/100, Train Loss: 0.085654, Val Loss: 0.094255
Epoch 8/100, Train Loss: 0.081278, Val Loss: 0.083787
Epoch 9/100, Train Loss: 0.077514, Val Loss: 0.099419
Epoch 10/100, Train Loss: 0.074837, Val Loss: 0.083736
Epoch 11/100, Train Loss: 0.071601, Val Loss: 0.083930
Epoch 12/100, Train Loss: 0.067755, Val Loss: 0.085653
Epoch 13/100, Train Loss: 0.066499, Val Loss: 0.094571
Epoch 14/100, Train Loss: 0.063646, Val Loss: 0.076588
Epoch 15/100, Train Loss: 0.060472, Val Loss: 0.084503
Epoch 16/100, Train Loss: 0.058090, Val Loss: 0.088358
Epoch 17/100, Train Loss: 0.057095, Val Loss: 0.082608
Epoch 18/100, Train Loss: 0.053980, Val Loss: 0.108688
Epoch 19/100, Train

In [None]:
# Save model weights for hardcoding
torch.set_printoptions(threshold=float('inf'), precision=10, linewidth=10000)
print("OrderedDict([")
for i, (name, param) in enumerate(model.state_dict().items()):
    tensor_str = repr(param)
    end_char = "," if i < len(model.state_dict()) - 1 else ""
    print(f"    ('{name}', {tensor_str}){end_char}")
print("])")