In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)

chess_data_path = r'C:\Users\Abrah\Dropbox\PC (2)\Desktop\GitHub Repos\CST499-40_FA22-Capstone-BradleyChess\chess_data\Chess_Games_DB_pd_df_Part_11.pkl'
chess_data = pd.read_pickle(chess_data_path, compression = 'zip')

In [None]:
import numpy as np
import random

class Agent:
    def __init__(self, color: str, chess_data: pd.DataFrame, learn_rate = 0.6, discount_factor = 0.35):
        self.learn_rate = learn_rate
        self.discount_factor = discount_factor
        self.color = color
        self.chess_data = chess_data
        self.is_trained: bool = False
        self.Q_table: pd.DataFrame = self.init_Q_table(self.chess_data)
    
    def choose_action(self, environ_state: dict[str, str, list[str]], curr_game: str = 'Game 1') -> str:
        moves_not_in_Q_table: list[str] = [move for move in environ_state['legal_moves'] if move not in self.Q_table.index]
        if moves_not_in_Q_table:
            self.update_Q_table(moves_not_in_Q_table)
        if self.is_trained:
            return self.policy_game_mode(environ_state['legal_moves'], environ_state['curr_turn'])
        else:
            return self.policy_training_mode(curr_game, environ_state["curr_turn"])

    def policy_training_mode(self, curr_game: str, curr_turn: str) -> str:
        return self.chess_data.at[curr_game, curr_turn]
    ### end of policy_training_mode ###

    def policy_game_mode(self, legal_moves: list[str], curr_turn: str) -> str:
        dice_roll = get_number_with_probability(0.1)
        legal_moves_in_q_table = self.Q_table[curr_turn].loc[self.Q_table[curr_turn].index.intersection(legal_moves)]

        if dice_roll == 1:
            chess_move = legal_moves_in_q_table.sample().index[0]
        else:
            chess_move = legal_moves_in_q_table.idxmax()
        return chess_move
    ### end of policy_game_mode ###

    def init_Q_table(self, chess_data: pd.DataFrame) -> pd.DataFrame:
        turns_list = [f'{self.color}{i + 1}' for i in range(200)]
        move_columns = [col for col in chess_data.columns if col.startswith(self.color)]
        unique_moves = pd.Series(chess_data[move_columns].values.flatten()).unique()
        q_table: pd.DataFrame = pd.DataFrame(0, index = unique_moves, columns = turns_list, dtype = np.int64)
        return q_table
    ### end of init_Q_table ###

    def change_Q_table_pts(self, chess_move: str, curr_turn: str, pts: int) -> None:
        self.Q_table.at[chess_move, curr_turn] += pts
    ### end of change_Q_table_pts ###

    def update_Q_table(self, new_chess_moves: list[str]) -> None:
        q_table_new_values: pd.DataFrame = pd.DataFrame(0, index = new_chess_moves, columns = self.Q_table.columns, dtype = np.int64)
        self.Q_table = pd.concat([self.Q_table, q_table_new_values])
    ### update_Q_table ###

    # @log_config.log_execution_time_every_N()        
    def reset_Q_table(self) -> None:
        self.Q_table.iloc[:, :] = 0 


def get_number_with_probability(probability: float) -> int:
    if random.random() < probability:
        return 1
    else:
        return 0
    

In [None]:
w_agent = Agent('W', chess_data)

In [None]:
w_agent.Q_table

In [None]:
w_agent.chess_data.info()

In [None]:
w_agent.Q_table = w_agent.Q_table.applymap(lambda x: np.random.randint(50, 10001))
w_agent.Q_table

In [None]:
# Assuming q_table is your DataFrame and 'W1' is the column you're interested in
selected_column_sorted = w_agent.Q_table['W1'].sort_values(ascending=False)
selected_column_sorted

In [None]:
environ_state = {'turn_index': 0, 'curr_turn': 'W1', 'legal_moves': [
    "e3", "e4", "d3", "d4", "c3", "c4", "f3", "f4", "g3", "g4", "h3", "h4", 
    "a3", "a4", "b3", "b4", "b1c3", "b1a3", "g1f3", "g1h3", 'Rec1+'
]}

In [None]:
w_agent.is_trained = True

In [None]:
chess_move_1 = w_agent.choose_action(environ_state)

In [None]:
chess_move_1