In [140]:
import numpy as np
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'

warnings.filterwarnings("ignore")

In [141]:
class TicTacToe:
    def __init__(self, agent1, agent2):
        self.board = np.zeros(9, dtype=int)  
        self.done = False  
        self.agent1 = agent1  
        self.agent2 = agent2  
        self.current_player = 1  

    def reset(self):
        self.board = np.zeros(9, dtype=int)
        self.done = False
        self.current_player = 1
        return self.board

    def render(self):
        symbols = {0: " ", 1: "X", -1: "O"}
        board = [symbols[cell] for cell in self.board]
        print("\n")
        print(f"{board[0]} | {board[1]} | {board[2]}")
        print("--+---+--")
        print(f"{board[3]} | {board[4]} | {board[5]}")
        print("--+---+--")
        print(f"{board[6]} | {board[7]} | {board[8]}")
        print("\n")

    def step(self, action):
        if self.board[action] != 0:  # Недопустимый ход
            print("Invalid move! Penalty applied.")
            return -1  # Штраф за недопустимый ход

        self.board[action] = self.current_player
        if self.check_winner(self.current_player):  # Проверка на победу
            self.done = True
            return 1  # Победа
        if 0 not in self.board:  # Проверка на ничью
            self.done = True
            return 0  # Ничья
        return 0  # Игра продолжается

    def check_winner(self, player):
        win_states = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Горизонтали
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Вертикали
            [0, 4, 8], [2, 4, 6]              # Диагонали
        ]
        for line in win_states:
            if all(self.board[i] == player for i in line):
                return True
        return False

    def play(self):
        self.reset()
        self.render()

        while not self.done:
            agent = self.agent1 if self.current_player == 1 else self.agent2
            print(f"Player {self.current_player}'s turn.")
            action = agent.act(self.board)  
            reward = self.step(action)  
            self.render()

            if reward == 1:
                print(f"Player {self.current_player} wins!")
            elif reward == 0 and self.done:
                print("It's a draw!")

            if reward != -1:  
                self.current_player *= -1





In [142]:
class RandomAgent():
    def act(self, state):
        valid_actions = [i for i in range(len(state)) if state[i] == 0]
        return random.choice(valid_actions)


In [143]:
import torch
import torch.nn as nn
import torch.nn.functional as F
device = 'cpu'

class TicTacToeModel(nn.Module):
    def __init__(self):
        super(TicTacToeModel, self).__init__()
        self.fc1 = nn.Linear(9, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 9)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        print(x)  
        return x

In [144]:
class DeepAgent():
    def __init__(self, model_path):
        self.model = torch.load(model_path).to(device)  
        self.model.eval()  

    def act(self, state):
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        valid_actions = [i for i in range(len(state)) if state[i] == 0]
        valid_q_values = [(i, q_values[0][i].item()) for i in valid_actions]
        best_action = max(valid_q_values, key=lambda x: x[1])[0]
        return best_action


In [145]:
class PlayerAgent():
    def act(self, state):
        while True:
            try:
                action = int(input("Enter your move (0-8): "))
                if state[action] == 0:
                    return action
                else:
                    print("Invalid move. Try again.")
            except (ValueError, IndexError):
                print("Please enter a valid position (0-8).")


In [146]:
player2 = RandomAgent()
player1 = DeepAgent(r".\\model_100000.pth")
game = TicTacToe(player1, player2)
game.play()



  |   |  
--+---+--
  |   |  
--+---+--
  |   |  


Player 1's turn.
tensor([[-0.9785, -1.0860, -0.3907, -0.8634, -0.5995, -0.3748, -0.8533, -0.9156,
         -0.1083]])


  |   |  
--+---+--
  |   |  
--+---+--
  |   | X


Player -1's turn.


  |   |  
--+---+--
  | O |  
--+---+--
  |   | X


Player 1's turn.
tensor([[36.2807, 24.2059, -5.8762, 20.3418,  4.8915, 19.8901,  6.3150, 21.9055,
         19.8494]])


X |   |  
--+---+--
  | O |  
--+---+--
  |   | X


Player -1's turn.


X |   |  
--+---+--
  | O |  
--+---+--
O |   | X


Player 1's turn.
tensor([[109.8827, 111.5956, -14.8011, 121.6105,  77.2833, 106.0513, 107.4262,
         107.3385, 103.5990]])


X |   |  
--+---+--
X | O |  
--+---+--
O |   | X


Player -1's turn.


X |   |  
--+---+--
X | O | O
--+---+--
O |   | X


Player 1's turn.
tensor([[ 89.0414,  85.5587, -20.9227,  90.4197,  66.6956,  77.7539,  70.2654,
          75.9974,  75.3911]])


X | X |  
--+---+--
X | O | O
--+---+--
O |   | X


Player -1's turn.


X | 