<a href="https://colab.research.google.com/github/Sarthak-0583-Ai/Lunar-Lander-Using-Deep-Q-learning/blob/main/TicTacToeGame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import the libraries

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np


# Game Logic AI

In [12]:
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)

    def print_board(self):
        for row in self.board:
            print(' '.join([str(cell) for cell in row]))

    def available_actions(self):
        return [(r, c) for r in range(3) for c in range(3) if self.board[r, c] == 0]

    def take_action(self, action, player):
        if self.board[action] == 0:
            self.board[action] = player
            return True
        return False

    def is_winner(self, player):
        for i in range(3):
            if all([self.board[i, j] == player for j in range(3)]):
                return True
            if all([self.board[j, i] == player for j in range(3)]):
                return True
        if all([self.board[i, i] == player for i in range(3)]):
            return True
        if all([self.board[i, 2 - i] == player for i in range(3)]):
            return True
        return False

    def is_draw(self):
        return all([cell != 0 for row in self.board for cell in row])

    def game_over(self):
        return self.is_winner(1) or self.is_winner(2) or self.is_draw()


# DQN module


In [13]:
class TicTacToeNN(nn.Module):
    def __init__(self):
        super(TicTacToeNN, self).__init__()
        self.fc1 = nn.Linear(9, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 9)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# Training the model

In [14]:
def generate_dataset(num_games):
    states = []
    actions = []
    game = TicTacToe()
    for _ in range(num_games):
        game.reset()
        current_player = 1
        while not game.game_over():
            available = game.available_actions()
            action = random.choice(available)
            states.append(game.board.flatten())
            actions.append(action[0] * 3 + action[1])
            game.take_action(action, current_player)
            current_player = 3 - current_player  # switch player
    return np.array(states), np.array(actions)

def train(model, states, actions, epochs=1000, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        optimizer.zero_grad()
        inputs = torch.FloatTensor(states)
        targets = torch.LongTensor(actions)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')


# Train to play against the neural network

In [15]:
def play_game(model):
    game = TicTacToe()
    current_player = 1
    while not game.game_over():
        game.print_board()
        if current_player == 1:
            row = int(input("Enter the row (0, 1, 2): "))
            col = int(input("Enter the column (0, 1, 2): "))
            if not game.take_action((row, col), 1):
                print("Invalid move! Try again.")
                continue
        else:
            state = torch.FloatTensor(game.board.flatten()).unsqueeze(0)
            with torch.no_grad():
                action_probs = model(state).numpy().flatten()
            action = np.argmax(action_probs)
            row, col = divmod(action, 3)
            game.take_action((row, col), 2)
        current_player = 3 - current_player  # switch player
    game.print_board()
    if game.is_winner(1):
        print("You win!")
    elif game.is_winner(2):
        print("Computer wins!")
    else:
        print("It's a draw!")


# Dataset

In [16]:
def play_game(model):
    game = TicTacToe()
    current_player = 1
    while not game.game_over():
        game.print_board()
        if current_player == 1:
            row = int(input("Enter the row (0, 1, 2): "))
            col = int(input("Enter the column (0, 1, 2): "))
            if not game.take_action((row, col), 1):
                print("Invalid move! Try again.")
                continue
        else:
            state = torch.FloatTensor(game.board.flatten()).unsqueeze(0)
            with torch.no_grad():
                action_probs = model(state).numpy().flatten()
            action = np.argmax(action_probs)
            row, col = divmod(action, 3)
            game.take_action((row, col), 2)
        current_player = 3 - current_player  # switch player
    game.print_board()
    if game.is_winner(1):
        print("You win!")
    elif game.is_winner(2):
        print("Computer wins!")
    else:
        print("It's a draw!")


In [17]:
# Generate dataset
states, actions = generate_dataset(1000)

# Initialize and train the model
model = TicTacToeNN()
train(model, states, actions, epochs=1000, lr=0.001)

# Play a game against the trained model
play_game(model)


Epoch [100/1000], Loss: 1.5960
Epoch [200/1000], Loss: 1.5458
Epoch [300/1000], Loss: 1.5140
Epoch [400/1000], Loss: 1.4900
Epoch [500/1000], Loss: 1.4702
Epoch [600/1000], Loss: 1.4541
Epoch [700/1000], Loss: 1.4397
Epoch [800/1000], Loss: 1.4273
Epoch [900/1000], Loss: 1.4168
Epoch [1000/1000], Loss: 1.4074
0 0 0
0 0 0
0 0 0
Enter the row (0, 1, 2): 0
Enter the column (0, 1, 2): 0
1 0 0
0 0 0
0 0 0
1 2 0
0 0 0
0 0 0
Enter the row (0, 1, 2): 1
Enter the column (0, 1, 2): 1
1 2 0
0 1 0
0 0 0
1 2 0
0 1 0
2 0 0
Enter the row (0, 1, 2): 2
Enter the column (0, 1, 2): 2
1 2 0
0 1 0
2 0 1
You win!
