In [6]:
import numpy as np
import tensorflow as tf
import random as random
import tensorflowjs as tfjs


In [7]:
class GomokuBoard:
    def __init__(self, size=15):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)
        self.current_player = 1

    def make_move(self, row, col):
        if self.board[row, col] == 0:
            self.board[row, col] = self.current_player
            self.current_player = 3 - self.current_player
            return True
        return False  # 이미 돌이 놓인 자리면 False 반환

    def check_win(self, player):
        # 간단한 승리 체크 (가로, 세로, 대각선)
        for i in range(self.size):
            for j in range(self.size):
                if (j <= self.size - 5 and np.all(self.board[i, j:j+5] == player) or
                    i <= self.size - 5 and np.all(self.board[i:i+5, j] == player) or
                    i <= self.size - 5 and j <= self.size - 5 and np.all(np.diagonal(self.board[i:i+5, j:j+5]) == player) or
                    i <= self.size - 5 and j >= 4 and np.all(np.diagonal(np.fliplr(self.board[i:i+5, j-4:j+1])) == player)):
                    return True
        return False
    
    def calculate_reward(self, row, col, player):
        reward = 0

        # 승리 체크
        if self.check_win(player):
            return 1.0  # 승리 시 최대 보상

        # 유효한 수 보상
        if self.board[row, col] == player:
            reward += 0.1

        # 연속된 돌에 대한 보상
        directions = [(0, 1), (1, 0), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            for i in range(1, 5):
                r, c = row + dr * i, col + dc * i
                if 0 <= r < self.size and 0 <= c < self.size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            for i in range(1, 5):
                r, c = row - dr * i, col - dc * i
                if 0 <= r < self.size and 0 <= c < self.size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            reward += self.get_sequence_reward(count)

        # 중앙 근처에 둔 경우 추가 보상
        center = self.size // 2
        distance_to_center = abs(row - center) + abs(col - center)
        reward += max(0, (self.size - distance_to_center) / self.size * 0.1)

        return reward

    def get_sequence_reward(self, count):
        if count == 2:
            return 0.01
        elif count == 3:
            return 0.05
        elif count == 4:
            return 0.25
        else:
            return 0
    
    def get_valid_moves(self):
        return [(i, j) for i in range(self.size) for j in range(self.size) if self.board[i, j] == 0]


# AI 모델
def create_model(board_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(board_size, board_size, 1)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(board_size * board_size, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# 학습 함수
def train_model(model, num_episodes=1):
    board = GomokuBoard()
    epsilon = 0.3  # 탐험 확률

    for episode in range(num_episodes):
        state = board.board.reshape(board.size, board.size, 1)
        done = False
        move_history = set()  # 이번 게임에서 둔 수를 기록
        while not done:
            valid_moves = board.get_valid_moves()
            if not valid_moves:  # 더 이상 둘 수 있는 곳이 없으면 무승부
                done = True
                continue

            if random.random() < epsilon:
                # 탐험: 랜덤하게 유효한 수 선택
                row, col = random.choice(valid_moves)
                action = row * board.size + col
            else:
                # 활용: 모델이 예측한 최선의 수 선택
                predictions = model.predict(np.array([state]))[0]
                valid_actions = [r * board.size + c for r, c in valid_moves]
                action = max(valid_actions, key=lambda a: predictions[a])

            row, col = action // board.size, action % board.size
            
            # 유효한 수인지 다시 한번 확인
            if (row, col) not in valid_moves:
                continue

            prev_player = board.current_player
            move_success = board.make_move(row, col)
            
            if not move_success:  # 이미 돌이 놓인 자리라면 다시 선택
                continue

            move_history.add((row, col))
            
            reward = board.calculate_reward(row, col, prev_player)
            new_state = board.board.reshape(board.size, board.size, 1)
            
            if board.check_win(prev_player):
                done = True
                reward = 1.0  # 승리 시 최대 보상

            # Q-learning 업데이트
            target = reward + 0.99 * np.max(model.predict(np.array([new_state]))[0])
            target_vec = model.predict(np.array([state]))[0]
            target_vec[action] = target
            model.fit(np.array([state]), np.array([target_vec]), epochs=1, verbose=0)
            
            state = new_state
            
            if len(move_history) == board.size * board.size:  # 모든 칸이 채워졌을 때
                done = True
                

        # 에피소드 종료 후 보드 초기화
        board = GomokuBoard()

        # 탐험 확률 감소
        epsilon = max(0.01, epsilon * 0.995)
    print("Training completed.")





In [8]:
def save_model_for_web(model, filepath):
    # 네이티브 Keras 형식으로 저장
    model.save('temp_model.keras', save_format='keras')
    
    # Keras 모델을 TensorFlow.js 형식으로 변환
    tfjs.converters.save_keras_model(tf.keras.models.load_model('temp_model.keras'), filepath)
    
    print(f"Model saved for web at {filepath}")

In [9]:
train_model(model)

NameError: name 'model' is not defined

In [None]:
save_model_for_web(model, "web_model")


  saveable.load_own_variables(weights_store.get(inner_path))


failed to lookup keras version from the file,
    this is likely a weight only file
Model saved for web at web_model
