运行环境:pytorch、numpy、sgf、matplotlib、pillow、flask
运行 CNN + 决策树版本
python play.py运行 CNN 版本(目前效果更佳)
python play_old.py运行自我对弈
python selfplay.py- 到
LetGoAI.py文件中更改需要用于训练的PolicyModel,目前默认是PolicyModel_pro(较复杂的网络) - 调整输出模型的文件名
MODEL_NAME = "PolicyModel2",使用python train.py进行训练
# 策略模型
class PolicyModel(nn.Module):
'''
19x19 棋盘矩阵 --> 卷积层 x 3 --> 全连接层 x 1 --> 19x19 概率矩阵 (log_softmax)
'''
def __init__(self, width=19) -> None:
super().__init__()
self.board_width = width
self.conv1 = nn.Conv2d(2, 40, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(40, 64, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(128, 4, kernel_size=1)
self.policy_fc1 = nn.Linear(4*width*width, width*width)
def forward(self, state_input):
x = F.relu(self.conv1(state_input))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = x.view(-1, 4*self.board_width*self.board_width)
x = self.policy_fc1(x)
x = F.log_softmax(x)
return x