In [None]:
# Contents
# [0:5] material count
# [5:69] positional counts for pawns
# [69:133] positional counts for knights
# [133:197] positional counts for bishops
# [197:261] positional counts for rooks
# [261:325] positional counts for queens
# [325:389] positional counts for kings                     0.2
# [389:395] mobility per piece                              0.1
# [395:396] tempo                                           0.05
# [396:398] pawn structure (passed pawns, stacked pawns)           # (phi_mg*phase + phi_eg*(1-phase)) type

# [398:399] king distance                                   0.1    # (single *(1-phase))

# [399:400] phase                                           Not part of feature vector
# [400:401] game result                                     Z = game result (1 if white win, -1 if black win, 0 if draw or undefined)


# Final Result
# [0:5] material count * phase
# [5:69] positional counts for pawns * phase
# [69:133] positional counts for knights * phase
# [133:197] positional counts for bishops * phase
# [197:261] positional counts for rooks * phase
# [261:325] positional counts for queens * phase
# [325:389] positional counts for kings * phase                    
# [389:395] mobility per piece * phase                             
# [395:396] tempo * phase                                          
# [396:398] pawn structure (passed pawns, stacked pawns) * phase

# [398:403] material count * (1 - phase)
# [403:467] positional counts for pawns * (1 - phase)
# [467:531] positional counts for knights * (1 - phase)
# [531:595] positional counts for bishops * (1 - phase)
# [595:659] positional counts for rooks * (1 - phase)
# [659:723] positional counts for queens * (1 - phase)
# [723:787] positional counts for kings * (1 - phase)
# [787:793] mobility per piece * (1 - phase)
# [793:794] tempo * (1 - phase)
# [794:796] pawn structure (passed pawns, stacked pawns) * (1 - phase)

# [796:797] king distance * (1 - phase)

In [None]:
import numpy as np
from Bot.V3.bot3_2 import Bot3_2
import chess
import random
import pandas as pd

board = chess.Board()
bot1 = Bot3_2()
bot2 = Bot3_2()
bots = [bot1, bot2]
lambda_ = 0.7
learning_rate = 0.5 * 1e-4
eta = 0.15  # exploration rate

In [2]:
# Heuristic weights
weights = np.zeros((797, ))

weights[0:5] = np.array([82, 337, 365, 477, 1025])  
weights[398:403] = np.array([94, 281, 297, 512, 936])

weights[5:69] = np.array([
    0,   0,   0,   0,   0,   0,   0,   0,
    98, 134,  61,  95,  68, 126,  34, -11,
    -6,   7,  26,  31,  65,  56,  25, -20,
-14,  13,   6,  21,  23,  12,  17, -23,
-27,  -2,  -5,  12,  17,   6,  10, -25,
-26,  -4,  -4, -10,   3,   3,  33, -12,
-35,  -1, -20, -23, -15,  24,  38, -22,
    0,   0,   0,   0,   0,   0,   0,   0
])

weights[403:467] = np.array([
    0,   0,   0,   0,   0,   0,   0,   0,
    178, 173, 158, 134, 147, 132, 165, 187,
    94, 100,  85,  67,  56,  53,  82,  84,
    32,  24,  13,   5,  -2,   4,  17,  17,
    13,   9,  -3,  -7,  -7,  -8,   3,  -1,
    4,   7,  -6,   1,   0,  -5,  -1,  -8,
    13,   8,   8,  10,  13,   0,   2,  -7,
    0,   0,   0,   0,   0,   0,   0,   0
])

weights[69:133] = np.array([
    -167, -89, -34, -49,  61, -97, -15, -107,
    -73, -41,  72,  36,  23,  62,   7,  -17,
    -47,  60,  37,  65,  84, 129,  73,   44,
    -9,  17,  19,  53,  37,  69,  18,   22,
    -13,   4,  16,  13,  28,  19,  21,   -8,
    -23,  -9,  12,  10,  19,  17,  25,  -16,
    -29, -53, -12,  -3,  -1,  18, -14,  -19,
    -105, -21, -58, -33, -17, -28, -19,  -23,
])

weights[467:531] = np.array([
    -58, -38, -13, -28, -31, -27, -63, -99,
    -25,  -8, -25,  -2,  -9, -25, -24, -52,
    -24, -20,  10,   9,  -1,  -9, -19, -41,
    -17,   3,  22,  22,  22,  11,   8, -18,
    -18,  -6,  16,  25,  16,  17,   4, -18,
    -23,  -3,  -1,  15,  10,  -3, -20, -22,
    -42, -20, -10,  -5,  -2, -20, -23, -44,
    -29, -51, -23, -15, -22, -18, -50, -64,
])

weights[133:197] = np.array([
    -29,   4, -82, -37, -25, -42,   7,  -8,
    -26,  16, -18, -13,  30,  59,  18, -47,
    -16,  37,  43,  40,  35,  50,  37,  -2,
    -4,   5,  19,  50,  37,  37,   7,  -2,
    -6,  13,  13,  26,  34,  12,  10,   4,
    0,  15,  15,  15,  14,  27,  18,  10,
    4,  15,  16,   0,   7,  21,  33,   1,
    -33,  -3, -14, -21, -13, -12, -39, -21,
])

weights[531:595] = np.array([
    -14, -21, -11,  -8, -7,  -9, -17, -24,
    -8,  -4,   7, -12, -3, -13,  -4, -14,
    2,  -8,   0,  -1, -2,   6,   0,   4,
    -3,   9,  12,   9, 14,  10,   3,   2,
    -6,   3,  13,  19,  7,  10,  -3,  -9,
    -12,  -3,   8,  10, 13,   3,  -7, -15,
    -14, -18,  -7,  -1,  4,  -9, -15, -27,
    -23,  -9, -23,  -5, -9, -16,  -5, -17,
])

weights[197:261] = np.array([
    32,  42,  32,  51, 63,  9,  31,  43,
    27,  32,  58,  62, 80, 67,  26,  44,
    -5,  19,  26,  36, 17, 45,  61,  16,
    -24, -11,   7,  26, 24, 35,  -8, -20,
    -36, -26, -12,  -1,  9, -7,   6, -23,
    -45, -25, -16, -17,  3,  0,  -5, -33,
    -44, -16, -20,  -9, -1, 11,  -6, -71,
    -19, -13,   1,  17, 16,  7, -37, -26,
])

weights[595:659] = np.array([
    13, 10, 18, 15, 12,  12,   8,   5,
    11, 13, 13, 11, -3,   3,   8,   3,
    7,  7,  7,  5,  4,  -3,  -5,  -3,
    4,  3, 13,  1,  2,   1,  -1,   2,
    3,  5,  8,  4, -5,  -6,  -8, -11,
    -4,  0, -5, -1, -7, -12,  -8, -16,
    -6, -6,  0,  2, -9,  -9, -11,  -3,
    -9,  2,  3, -1, -5, -13,   4, -20,
])

weights[261:325] = np.array([
    -28,   0,  29,  12,  59,  44,  43,  45,
    -24, -39,  -5,   1, -16,  57,  28,  54,
    -13, -17,   7,   8,  29,  56,  47,  57,
    -27, -27, -16, -16,  -1,  17,  -2,   1,
    -9, -26,  -9, -10,  -2,  -4,   3,  -3,
    -14,   2, -11,  -2,  -5,   2,  14,   5,
    -35,  -8,  11,   2,   8,  15,  -3,   1,
    -1, -18,  -9,  10, -15, -25, -31, -50,
])

weights[659:723] = np.array([
    -9,  22,  22,  27,  27,  19,  10,  20,
    -17,  20,  32,  41,  58,  25,  30,   0,
    -20,   6,   9,  49,  47,  35,  19,   9,
    3,  22,  24,  45,  57,  40,  57,  36,
    -18,  28,  19,  47,  31,  34,  39,  23,
    -16, -27,  15,   6,   9,  17,  10,   5,
    -22, -23, -30, -16, -16, -23, -36, -32,
    -33, -28, -22, -43,  -5, -32, -20, -41,
])

weights[325:389] = np.array([
    -65,  23,  16, -15, -56, -34,   2,  13,
    29,  -1, -20,  -7,  -8,  -4, -38, -29,
    -9,  24,   2, -16, -20,   6,  22, -22,
    -17, -20, -12, -27, -30, -25, -14, -36,
    -49,  -1, -27, -39, -46, -44, -33, -51,
    -14, -14, -22, -46, -44, -30, -15, -27,
    1,   7,  -8, -64, -43, -16,   9,   8,
    -15,  36,  12, -54,   8, -28,  24,  14,
])

weights[723:787] = np.array([
    -74, -35, -18, -18, -11,  15,   4, -17,
    -12,  17,  14,  17,  17,  38,  23,  11,
    10,  17,  23,  15,  20,  45,  44,  13,
    -8,  22,  24,  27,  26,  33,  26,   3,
    -18,  -4,  21,  24,  27,  23,   9, -11,
    -19,  -3,  11,  21,  23,  16,   7,  -9,
    -27, -11,   4,  13,  14,   4,  -5, -17,
    -53, -34, -21, -11, -28, -14, -24, -43
])

weights[389:395] =  np.array([
    5, 10, 8, 4, 6, 7
])

weights[787:793] =  np.array([
    5, 10, 8, 4, 6, 7
])

weights[395] = 10  # tempo
weights[793] = 10  # tempo

weights[396:398] = np.array([50, -25])  # pawn structure
weights[794:796] = np.array([50, -25])

weights[796] = 10

weights /= 1000


In [None]:
# Random Weight initialization
weights = np.random.uniform(-1, 1, size=(398 * 2 + 1, ))

In [None]:
np.savetxt("weights.txt", weights)

In [None]:
weights = np.loadtxt("weights4.txt")

In [None]:
number_of_games = 400
for game_index in range(number_of_games):
    board.reset()
    bot1.game.board.reset()
    bot2.game.board.reset()

    if game_index % 10 == 0:
        bot1.tt = {}
        bot2.tt = {}

    e = np.zeros((398 * 2 + 1, ))
    current_bot_index = 1 if game_index % 2 == 0 else 0
    was_last_move_random = False

    last_state = bots[current_bot_index].get_feature_vector()
    print(f"Starting game {game_index + 1}/{number_of_games}")
    print(weights[0:5],weights[398:403])


    while not board.is_game_over():
        current_bot = bots[current_bot_index]
        current_bot.initialize_weights(weights)
        
        if random.random() < eta:
            was_last_move_random = True
            move = random.choice(list(board.legal_moves))

            board.push(move)
            bot1.game.board.push(move)
            bot2.game.board.push(move)
            last_state = bots[1 - current_bot_index].get_feature_vector()
        else:
            was_last_move_random = False
            move, _ = current_bot.select_move() 

            board.push(move)
            bot1.game.board.push(move)
            bot2.game.board.push(move)

            new_state = bots[1 - current_bot_index].get_feature_vector()
            V_s = np.dot(weights, last_state)
            V_next = np.dot(weights, new_state)

            if board.turn == chess.WHITE: 
                V_s = -V_s
            else:
                V_next = -V_next

            delta = V_next - V_s

            e = lambda_ * e + last_state
            weights += learning_rate * delta * e

            last_state = new_state



        current_bot_index = 1 - current_bot_index

    if was_last_move_random:
        continue

    result = board.result()
    if result == '1-0':
        z = 1
    elif result == '0-1':
        z = -1
    else:
        z = 0

    V_final = np.dot(weights, last_state)
    if board.turn == chess.WHITE: 
        V_final = -V_final

    delta = z - V_final
    e = lambda_ * e + last_state
    weights += learning_rate * delta * e


    

Starting game 1/200
[0.082 0.337 0.365 0.477 1.025] [0.094 0.281 0.297 0.512 0.936]
Starting game 2/200
[0.08193754 0.33687162 0.36506518 0.47679259 1.02559906] [0.09408134 0.28096412 0.29708117 0.51200551 0.93673483]
Starting game 3/200
[0.08204136 0.33693455 0.36478676 0.47618873 1.0251275 ] [0.09414639 0.28099293 0.29686872 0.51116279 0.93592028]
Starting game 4/200
[0.0818511  0.33688184 0.36450534 0.47597938 1.02469317] [0.09349246 0.28098734 0.29595252 0.51056822 0.93481651]
Starting game 5/200
[0.08173525 0.33707104 0.36437858 0.47592419 1.02515552] [0.09328558 0.28103218 0.29621886 0.51038428 0.93557303]
Starting game 6/200
[0.0820203  0.33672495 0.36520768 0.47592419 1.02598775] [0.09340908 0.28089595 0.29655825 0.51038428 0.93592358]
Starting game 7/200
[0.08202142 0.33672334 0.36510697 0.4759075  1.02604078] [0.09340739 0.28089569 0.29639931 0.51036387 0.93593998]
Starting game 8/200
[0.08210937 0.33672581 0.36501778 0.47612371 1.02670902] [0.09338542 0.28088605 0.29647883 0

In [4]:
np.savetxt("weights1.txt", weights)

In [7]:
# Battle between two bots
new_weights = weights.copy()
old_weights = np.loadtxt("weights.txt")
board.reset()
bot1.game.board.reset()
bot1.initialize_weights(new_weights)
bot2.game.board.reset()
bot2.initialize_weights(old_weights)
current_bot_index = 1


while not board.is_game_over():
    print("====================")
    print(board)

    current_bot = bots[current_bot_index]

    move, _ = current_bot.select_move() 
    board.push(move)
    bot1.game.board.push(move)
    bot2.game.board.push(move)

    current_bot_index = 1 - current_bot_index

result = board.result()
if result == '1-0':
    print("New weights bot wins!")
elif result == '0-1':
    print("Old weights bot wins!")
else:
    print("It's a draw!")

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . N . . . . .
P P P P P P P P
R . B Q K B N R
r n b q k b . r
p p p p p p p p
. . . . . n . .
. . . . . . . .
. . . . . . . .
. . N . . . . .
P P P P P P P P
R . B Q K B N R
r n b q k b . r
p p p p p p p p
. . . . . n . .
. . . . . . . .
. . . . . . . .
. . N . . N . .
P P P P P P P P
R . B Q K B . R
r . b q k b . r
p p p p p p p p
. . n . . n . .
. . . . . . . .
. . . . . . . .
. . N . . N . .
P P P P P P P P
R . B Q K B . R
r . b q k b . r
p p p p p p p p
. . n . . n . .
. N . . . . . .
. . . . . . . .
. . . . . N . .
P P P P P P P P
R . B Q K B . R
r . b q k b . r
p p p p p p p p
. . n . . . . .
. N . . . . . .
. . . . . . n .
. . . . . N . .
P P P P P P P P
R . B Q K B . R
r . b q k b . r
p p p p p p p p
. . n . . . . .
. N . . . . . .
. . . . . . n .
. . . . . N . P
P P P P 

In [None]:
# for each game:
#     e = zero_vector(length = N)

#     for t = 0 to T-1:
#         s = state_t
#         φ_s = extract_features(s)

#         s_next = best_child_from_search(s)
#         φ_next = extract_features(s_next)

#         V_s = dot(θ, φ_s)
#         V_next = dot(θ, φ_next)

#         δ = V_next - V_s

#         e = λ * e + φ_s
#         θ = θ + α * δ * e

#     # terminal state
#     s_T = final_state
#     φ_T = extract_features(s_T)
#     V_T = dot(θ, φ_T)

#     z = game_outcome  # +1, 0, -1

#     δ = z - V_T
#     e = λ * e + φ_T
#     θ = θ + α * δ * e


In [None]:
df = pd.read_csv("game_data.csv", header=None)

X = df.iloc[:, 0:398].to_numpy()      # raw features
phase = df.iloc[:, 399].to_numpy()    # phase
z = df.iloc[-1, 400]                  # game result (same as your code)
phase_norm = phase / 24.0

# Early / late game scaling
early = X * phase_norm[:, None]
late  = X * (1.0 - phase_norm[:, None])
invariant_feature = df.iloc[:, 398].to_numpy()  # king distance

# Allocate phi
phi = np.ones((len(df), 398 * 2 + 1))

# Fill phi
phi[:, :398] = early
phi[:, 398:398*2] = late
phi[:, 398*2] = invariant_feature

phi[:, 325:389] *= 0.2  # king positional counts
phi[:, 398 + 325: 398 + 389] *= 0.2
phi[:, 389:395] *= 0.1  # mobility per piece
phi[:, 398 + 389: 398 + 395] *= 0.1
phi[:, 395:396] *= 0.05 # tempo
phi[:, 398 + 395: 398 + 396] *= 0.05
phi[:, 398*2] *= 0.1    # king distance