<a href="https://colab.research.google.com/github/19tylermalone94/Connect_4_AI/blob/main/reinforcement_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [417]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random


In [418]:
def get_solution(game_state: str) -> dict | None:
    url = 'https://2dc16494723d8b.lhr.life'
    num_tries = 10
    for _ in range(num_tries):
        try:
            response = requests.post(url, data=game_state)
            if response.status_code == 200:
                return {'pos': game_state, 'score': [int(n) for n in response.text.strip().split(' ')]}
            else:
                print("Failed to retrieve solution, status code:", response.status_code)
        except requests.RequestException as e:
            print("Error connecting to server:", str(e))


In [419]:
print(get_solution(game_state='35353'))


{'pos': '35353', 'score': [-18, -18, -3, -18, -18, -18, -18]}


In [420]:
def new_board() -> np.ndarray:
    return np.zeros((6, 7), dtype=int)


def drop_piece(board: np.ndarray, col: int, piece: int) -> None:
    board[available_row(board, col)][col] = piece


def is_available(board: np.ndarray, col: int) -> bool:
    return board[0][col] == 0


def available_row(board: np.ndarray, col: int) -> int:
    for row in range(len(board) - 1, -1, -1):
        if board[row][col] == 0:
            return row
    return 0


def play(board: np.ndarray, col: int, player: int) -> bool:
    if not is_available(board, col):
        return False
    drop_piece(board, col, player)
    return True


def check_winner(board: np.ndarray) -> int | None:
    # horizontal
    for r in range(board.shape[0]):
        for c in range(board.shape[1] - 3):
            if board[r, c] == board[r, c+1] == board[r, c+2] == board[r, c+3] != 0:
                return board[r, c]

    # vertical
    for r in range(board.shape[0] - 3):
        for c in range(board.shape[1]):
            if board[r, c] == board[r+1, c] == board[r+2, c] == board[r+3, c] != 0:
                return board[r, c]

    # positive diagonal
    for r in range(board.shape[0] - 3):
        for c in range(board.shape[1] - 3):
            if board[r, c] == board[r+1, c+1] == board[r+2, c+2] == board[r+3, c+3] != 0:
                return board[r, c]

    # negative diagonal
    for r in range(3, board.shape[0]):
        for c in range(board.shape[1] - 3):
            if board[r, c] == board[r-1, c+1] == board[r-2, c+2] == board[r-3, c+3] != 0:
                return board[r, c]

    return None


In [421]:
output_path = '/content/drive/MyDrive/connect-4-data/'


In [422]:
def nn_move(board: np.ndarray, seq: str) -> int:
  # TODO
    return random_move(seq)


def best_move(score: list[int]) -> int:
    max_score = min(score)
    for i in range(len(score)):
        if score[i] > max_score:
            max_score = score[i]
    best_indexes = [i + 1 for i in range(len(score)) if score[i] == max_score]
    return random.choice(best_indexes)


def random_move(seq: str) -> int:
    move = random.randint(1, 7)
    while seq.count(str(move)) >= 6:
        move = random.randint(1, 7)
    return move


def game_over(board: np.ndarray, seq: str) -> bool:
    return check_winner(board) is not None or len(seq) == 42


def run_games(num_games: int, perfect_opponent: bool=True) -> None:
    for i in range(num_games):
        board = new_board()
        seq = ''
        player = 1
        while True:
            if player == 1:
                move = nn_move(board, seq)
            else:
                score = get_solution(seq)['score']
                move = best_move(score) if perfect_opponent else random_move(seq)
            seq += str(move)
            play(board, move - 1, player)
            print(board, '\n')
            if game_over(board, seq):
                print(f"{'minimax' if player == -1 else 'our_model'} wins")
                break
            player *= -1


In [423]:
run_games(1)

[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  0  1  0  0  0]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  0  1  0  0  1]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  0  1 -1  0  1]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  1]
 [ 0  0  0  1 -1  0  1]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  0 -1  0  0  1]
 [ 0  0  0  1 -1  0  1]] 

[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  0 -1  0  0  1]
 [ 1  0  0  1 -1  0  1]] 

[[ 0  0  0  0