In [1]:
import numpy as np
import pandas as pd

# Contents
# [0:5] material count
# [5:69] positional counts for pawns
# [69:133] positional counts for knights
# [133:197] positional counts for bishops
# [197:261] positional counts for rooks
# [261:325] positional counts for queens
# [325:389] positional counts for kings                     0.2
# [389:395] mobility per piece                              0.1
# [395:396] tempo                                           0.05
# [396:398] pawn structure (passed pawns, stacked pawns)           # (phi_mg*phase + phi_eg*(1-phase)) type

# [398:399] king distance                                   0.1    # (single *(1-phase))

# [399:400] phase                                           Not part of feature vector
# [400:401] game result                                     Z = game result (1 if white win, -1 if black win, 0 if draw or undefined)


# Final Result
# [0:5] material count * phase
# [5:69] positional counts for pawns * phase
# [69:133] positional counts for knights * phase
# [133:197] positional counts for bishops * phase
# [197:261] positional counts for rooks * phase
# [261:325] positional counts for queens * phase
# [325:389] positional counts for kings * phase                    
# [389:395] mobility per piece * phase                             
# [395:396] tempo * phase                                          
# [396:398] pawn structure (passed pawns, stacked pawns) * phase

# [398:403] material count * (1 - phase)
# [403:467] positional counts for pawns * (1 - phase)
# [467:531] positional counts for knights * (1 - phase)
# [531:595] positional counts for bishops * (1 - phase)
# [595:659] positional counts for rooks * (1 - phase)
# [659:723] positional counts for queens * (1 - phase)
# [723:787] positional counts for kings * (1 - phase)
# [787:793] mobility per piece * (1 - phase)
# [793:794] tempo * (1 - phase)
# [794:796] pawn structure (passed pawns, stacked pawns) * (1 - phase)

# [796:797] king distance * (1 - phase)

In [6]:
# Weight initialization
weights = np.random.uniform(-1, 1, size=(398 * 2 + 1, ))
np.savetxt("weights.txt", weights)

In [3]:
# Upload from .txt
weights = np.loadtxt("weights.txt")

In [4]:
from Bot.bot3_1 import Bot3_1
import chess
import random

board = chess.Board()
bot1 = Bot3_1()
bot2 = Bot3_1()

lambda_ = 0.7
learning_rate = 0.5 * 1e-4
eta = 0.15  # exploration rate

In [7]:
number_of_games = 25
for game_index in range(number_of_games):
    board.reset()
    bot1.game.board.reset()
    bot2.game.board.reset()
    e = np.zeros((398 * 2 + 1, ))
    bots = [bot1, bot2]
    current_bot_index = 1 if game_index % 2 == 0 else 0
    was_last_move_random = False

    last_state = bots[current_bot_index].get_feature_vector()
    print(f"Starting game {game_index + 1}/{number_of_games}")
    print(weights[0:5],weights[398:403])


    while not board.is_game_over():
        current_bot = bots[current_bot_index]
        current_bot.initialize_weights(weights)
        
        if random.random() < eta:
            was_last_move_random = True
            move = random.choice(list(board.legal_moves))

            board.push(move)
            bot1.game.board.push(move)
            bot2.game.board.push(move)
            last_state = bots[1 - current_bot_index].get_feature_vector()
        else:
            was_last_move_random = False
            move, _ = current_bot.select_move() 

            board.push(move)
            bot1.game.board.push(move)
            bot2.game.board.push(move)

            new_state = bots[1 - current_bot_index].get_feature_vector()
            V_s = np.dot(weights, last_state)
            V_next = np.dot(weights, new_state)

            if board.turn == chess.WHITE: 
                V_s = -V_s
            else:
                V_next = -V_next

            delta = V_next - V_s

            e = lambda_ * e + last_state
            weights += learning_rate * delta * e

            last_state = new_state



        current_bot_index = 1 - current_bot_index

    if was_last_move_random:
        continue

    result = board.result()
    if result == '1-0':
        z = 1
    elif result == '0-1':
        z = -1
    else:
        z = 0

    V_final = np.dot(weights, last_state)
    if board.turn == chess.WHITE: 
        V_final = -V_final

    delta = z - V_final
    e = lambda_ * e + last_state
    weights += learning_rate * delta * e


    

Starting game 1/25
[-0.28609635 -0.44039958 -0.64806564  0.51646964  0.17743859] [ 0.46977658 -0.52223791  0.91322517 -0.99367798 -0.60809911]
Starting game 2/25
[-0.27167009 -0.46418973 -0.65537791  0.32291504  0.18564739] [ 0.45761105 -0.50350291  0.91951908 -0.82373551 -0.61582625]
Starting game 3/25
[-0.27562141 -0.42111735 -0.64752932  0.28079667  0.16484467] [ 0.46104498 -0.54333182  0.91231469 -0.78644772 -0.59743911]
Starting game 4/25
[-0.28222475 -0.35820794 -0.63516421  0.33198721  0.14820258] [ 0.46742223 -0.60178732  0.90226423 -0.83435345 -0.58320152]
Starting game 5/25
[-0.25140376 -0.55681018 -0.6348147   0.33333011  0.14505534] [ 0.4387067  -0.41654457  0.9019491  -0.83559564 -0.58033304]
Starting game 6/25
[-0.25283369 -0.55931739 -0.61262603  0.37975072  0.15434071] [ 0.43978982 -0.41456509  0.88394084 -0.87712388 -0.58791278]
Starting game 7/25
[-0.25096641 -0.59638092 -0.6262429   0.33730504  0.16396945] [ 0.43807033 -0.37917784  0.89538277 -0.84176264 -0.59610345]

KeyboardInterrupt: 

In [8]:
np.savetxt("weights2.txt", weights)

In [9]:
# Battle between two bots
new_weights = weights.copy()
old_weights = np.loadtxt("weights.txt")
board.reset()
bot1.game.board.reset()
bot1.initialize_weights(new_weights)
bot2.game.board.reset()
bot2.initialize_weights(old_weights)
current_bot_index = 0


while not board.is_game_over():
    print("====================")
    print(board)

    current_bot = bots[current_bot_index]

    move, _ = current_bot.select_move() 
    board.push(move)
    bot1.game.board.push(move)
    bot2.game.board.push(move)

    current_bot_index = 1 - current_bot_index

result = board.result()
if result == '1-0':
    print("New weights bot wins!")
elif result == '0-1':
    print("Old weights bot wins!")
else:
    print("It's a draw!")

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . N . .
P P P P P P P P
R N B Q K B . R
r n b q k b n r
p p p p p p p .
. . . . . . . .
. . . . . . . p
. . . . . . . .
. . . . . N . .
P P P P P P P P
R N B Q K B . R
r n b q k b n r
p p p p p p p .
. . . . . . . .
. . . . . . . p
. . . . . . . .
. . . . . N . .
P P P P P P P P
R N B Q K B R .
r n b q k b n .
p p p p p p p r
. . . . . . . .
. . . . . . . p
. . . . . . . .
. . . . . N . .
P P P P P P P P
R N B Q K B R .
r n b q k b n .
p p p p p p p r
. . . . . . . .
. . . . . . N p
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B R .
r n b q k b . .
p p p p p p p r
. . . . . n . .
. . . . . . N p
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B R .
r n b q k b . .
p p p p p p p r
. . . . . n . .
. . . . . . N p
P . . . . . . .
. . . . . . . .
. P P P 

KeyboardInterrupt: 

In [None]:
# for each game:
#     e = zero_vector(length = N)

#     for t = 0 to T-1:
#         s = state_t
#         φ_s = extract_features(s)

#         s_next = best_child_from_search(s)
#         φ_next = extract_features(s_next)

#         V_s = dot(θ, φ_s)
#         V_next = dot(θ, φ_next)

#         δ = V_next - V_s

#         e = λ * e + φ_s
#         θ = θ + α * δ * e

#     # terminal state
#     s_T = final_state
#     φ_T = extract_features(s_T)
#     V_T = dot(θ, φ_T)

#     z = game_outcome  # +1, 0, -1

#     δ = z - V_T
#     e = λ * e + φ_T
#     θ = θ + α * δ * e


In [None]:
df = pd.read_csv("game_data.csv", header=None)

X = df.iloc[:, 0:398].to_numpy()      # raw features
phase = df.iloc[:, 399].to_numpy()    # phase
z = df.iloc[-1, 400]                  # game result (same as your code)
phase_norm = phase / 24.0

# Early / late game scaling
early = X * phase_norm[:, None]
late  = X * (1.0 - phase_norm[:, None])
invariant_feature = df.iloc[:, 398].to_numpy()  # king distance

# Allocate phi
phi = np.ones((len(df), 398 * 2 + 1))

# Fill phi
phi[:, :398] = early
phi[:, 398:398*2] = late
phi[:, 398*2] = invariant_feature

phi[:, 325:389] *= 0.2  # king positional counts
phi[:, 398 + 325: 398 + 389] *= 0.2
phi[:, 389:395] *= 0.1  # mobility per piece
phi[:, 398 + 389: 398 + 395] *= 0.1
phi[:, 395:396] *= 0.05 # tempo
phi[:, 398 + 395: 398 + 396] *= 0.05
phi[:, 398*2] *= 0.1    # king distance