# Pipeline 2 - Stockfish engine evaluation

## Inputs
- The extracted files from previous pipeline 1

## Output
- A csv file

## CSV file columns
For each row of the csv file, the output contains:
- **Basic game info**
    - WhiteUsername
    - BlackUsername
    - WhiteElo
    - BlackElo
    - EloDifference
    - TimeControl
    - Opening
    - GameId
- **Moving info**
    - MoveId
    - RemainingTime
    - MovePlayed
    - MovePlayedEval
    - ProcessTime
    - NumberofNodes
- **Algorithm prediction** ($n$ from $1$ to $5$)
    - best_move_$n$,
    - best_score_$n$,
    - ProcessTime_$n$,
    - NumberofNodes_$n$,

---

## Import libraries

In [1]:
# import julia libraries
using PyCall
using CSV
using DataFrames
using Plots

# import python chess library
@pyimport chess
cp = pyimport("chess.pgn")
ce = pyimport("chess.engine")

PyObject <module 'chess.engine' from '/home/ubuntu/.local/lib/python3.10/site-packages/chess/engine.py'>

## Path

In [2]:
# Constants
const STOCKFISH_PATH = "/usr/local/bin/stockfish"

const DATA_PATH = "./data/"
const INPUT_DATA_PATH = "$(DATA_PATH)pipeline1_exported/" # file type: .pgn
const OUTPUT_DATA_PATH = "$(DATA_PATH)pipeline2_exported/" # file type: .csv

const TEMPLATE_FILE_PATH = "$(DATA_PATH)expected_output_template.csv"

# File types
const INPUT_FILETYPE = ".pgn"
const OUTPUT_FILETYPE = ".csv"

# Log
const LOG_FILE_PATH = "./log/"

"./log/"

In [3]:
# pgn_file = open(input_filename)
# game = cp.read_game(pgn_file)

## Methods

### From the Pipeline1

In [4]:
function write_log(filename::String, message::String; overwrite::Bool=false)
    file_mode = ""
    if overwrite
        file_mode = "w"
    else
        # Check if the file exists
        file_mode = isfile(filename) ? "a" : "w"
    end

    # Open the file with appropriate mode
    open(filename, file_mode) do file
        # Append the string to the file
        write(file, "$message")
    end
end

write_log (generic function with 1 method)

In [5]:
function game_cell_filename(criteria_result)::String
    time_group, elo_group = criteria_result

    # Mapping time groups to their string representations
    time_control = if time_group == 1
        "blitz"
    elseif time_group == 2
        "rapid"
    elseif time_group == 3
        "classical"
    end

    # Mapping ELO groups to their string representations
    elo_range = if elo_group == 1
        "beginner"
    elseif elo_group == 2
        "intermediate"
    elseif elo_group == 3
        "expert"
    end

    filename = "$(time_control)_$(elo_range)"

    return filename
end

game_cell_filename (generic function with 1 method)

#### Test file paths

In [6]:
# cell_name = game_cell_filename((3,3))

In [7]:
# log_filename = "$(LOG_FILE_PATH)P2_$cell_name.log"

In [8]:
# input_filename = "$INPUT_DATA_PATH$cell_name$INPUT_FILETYPE"

In [9]:
# output_filename = "$OUTPUT_DATA_PATH$cell_name$OUTPUT_FILETYPE"

### From Pipeline 2

In [10]:
function template_output(filename::String)
    game_data = DataFrame(
        GameOrder = Int64[],
        # basic_game_info()
        WhiteUsername = String[], BlackUsername = String[], WhiteElo = Int64[], BlackElo = Int64[], 
        EloDifference = Int64[], TimeControl = String[], Opening = String[], GameId = String[],
        # per_move_info()
        MoveId = Int64[], MovePlayed = String[], RemainingTime = Float64[],
        # post_game()
        MoveEvaluation = Int64[],
        # per_move_prediction()
        best_move_1 = String[], best_mate_1 = Int64[], best_score_1 = Int64[], processTime_1 = Float64[], numberofNodes_1 = Int64[], 
        best_move_2 = String[], best_mate_2 = Int64[], best_score_2 = Int64[], processTime_2 = Float64[], numberofNodes_2 = Int64[], 
        best_move_3 = String[], best_mate_3 = Int64[], best_score_3 = Int64[], processTime_3 = Float64[], numberofNodes_3 = Int64[], 
        best_move_4 = String[], best_mate_4 = Int64[], best_score_4 = Int64[], processTime_4 = Float64[], numberofNodes_4 = Int64[], 
        best_move_5 = String[], best_mate_5 = Int64[], best_score_5 = Int64[], processTime_5 = Float64[], numberofNodes_5 = Int64[]
    )
    CSV.write(filename, game_data)
end

template_output (generic function with 1 method)

In [11]:
# template_output("./template_test_output.csv")

In [12]:
function initialize_stockfish(STOCKFISH_PATH)
    stockfish = ce.SimpleEngine.popen_uci(STOCKFISH_PATH)

    return stockfish
end

initialize_stockfish (generic function with 1 method)

In [13]:
# stockfish = initialize_stockfish(STOCKFISH_PATH)

In [14]:
function basic_game_info(game)
    headers = game.headers

    WhiteUsername = get(headers, "White", "NA")
    BlackUsername = get(headers, "Black", "NA")
    WhiteElo = parse(Int64, get(headers, "WhiteElo", "0"))
    BlackElo = parse(Int64, get(headers, "BlackElo", "0"))
    EloDifference = WhiteElo - BlackElo
    TimeControl = get(headers, "TimeControl", "NA")
    Opening = get(headers, "Opening", "NA")
    GameId = split(get(headers, "Site", "NA"), "/")[end]

    basic_game_info = DataFrame(
        WhiteUsername = [WhiteUsername],
        BlackUsername = [BlackUsername],
        WhiteElo = [WhiteElo],
        BlackElo = [BlackElo],
        EloDifference = [EloDifference],
        TimeControl = [TimeControl],
        Opening = [Opening],
        GameId = [GameId]
    )

    return basic_game_info
end

basic_game_info (generic function with 1 method)

In [15]:
# basic_game_info(game)

- **Moving info**
    - MoveId
    - RemainingTime
    - MovePlayed
    - MovePlayedEval
    - ProcessTime
    - NumberofNodes
- **Algorithm prediction** ($n$ from $1$ to $5$)
    - best_move_$n$,
    - best_score_$n$,
    - ProcessTime_$n$,
    - NumberofNodes_$n$,

--

- Clock: `[%clk ...]`
- Eval: `[%eval ...]`

In [16]:
function per_move_info(stockfish, curr_move, MoveId)
    board = curr_move.board()

    MovePlayed = (curr_move.move == nothing) ? "" : curr_move.move.uci()
    RemainingTime = curr_move.clock() == nothing ? -999.999 : curr_move.clock()

    # write_log(log_filename, "$MoveId: ")
    
    move_info = DataFrame(
        MoveId = MoveId, MovePlayed = MovePlayed, RemainingTime = RemainingTime
    )

    return move_info

end

per_move_info (generic function with 1 method)

In [17]:
# per_move_info(stockfish, game.next(), 1)

In [18]:
function per_move_prediction(stockfish, curr_move, log_filename::String, depth::Int64 = 20, multipv::Int64 = 5)
    board = curr_move.board()
    analysis_results = stockfish.analyse(board, ce.Limit(depth=depth), multipv=multipv)

    best_moves = String[]
    best_mates = Int64[]
    best_scores = Int64[]
    processTimes = Float64[]
    numberofNodes = Int64[]

    for (index, result) in enumerate(analysis_results)
        if haskey(result, "pv") == false
            continue
        end

        append!(best_moves, String[result["pv"][1].uci()])
        score = result["score"].white()
        
        if score.is_mate() == true
            # print("Mate($index) is $(score.mate()); ")
            write_log(log_filename, "Mate($index) is $(score.mate()); ")
            append!(best_mates, score.mate())
            append!(best_scores, -999999)
        else
            # print("Cp($index) is $(score.score());")
            write_log(log_filename, "Cp($index) is $(score.score()); ")
            append!(best_mates, -999999)
            append!(best_scores, score.score())
        end
        append!(processTimes, get(result, "time", 0.000000))
        append!(numberofNodes, get(result, "nodes", 0))
    end

    for i = (length(best_moves) + 1):5
        push!(best_moves, "")
        push!(best_mates, -999999)
        push!(best_scores, -999999)
        push!(processTimes, 0.0000)
        push!(numberofNodes, -999999)
    end

    move_info = DataFrame(
        best_move_1 = best_moves[1], best_mate_1 = best_mates[1], best_score_1 = best_scores[1], processTime_1 = processTimes[1], numberofNodes_1 = numberofNodes[1],
        best_move_2 = best_moves[2], best_mate_2 = best_mates[2], best_score_2 = best_scores[2], processTime_2 = processTimes[2], numberofNodes_2 = numberofNodes[2],
        best_move_3 = best_moves[3], best_mate_3 = best_mates[3], best_score_3 = best_scores[3], processTime_3 = processTimes[3], numberofNodes_3 = numberofNodes[3],
        best_move_4 = best_moves[4], best_mate_4 = best_mates[4], best_score_4 = best_scores[4], processTime_4 = processTimes[4], numberofNodes_4 = numberofNodes[4],
        best_move_5 = best_moves[5], best_mate_5 = best_mates[5], best_score_5 = best_scores[5], processTime_5 = processTimes[5], numberofNodes_5 = numberofNodes[5]
    )
    
    return move_info

end

per_move_prediction (generic function with 3 methods)

In [19]:
# per_move_prediction(stockfish, game, log_filename)

In [20]:
function per_game(stockfish, curr_game, game_order::Int64, log_filename::String)
    # Log data analysis
    start_time = time() # Start processing ...

    # Not using `ucinewgame` since this is not actual playing
    # Re-initialize engine per each game
    ce.Protocol.initialize(stockfish)

    engine_time_stop = time() - start_time
    write_log(log_filename, "\n")
    write_log(log_filename, "Spent $engine_time_stop to initialize engine")

    # Constant info per each game
    game = curr_game
    base_info = basic_game_info(curr_game)
    gid = DataFrame(GameOrder = game_order)

    MoveId = 0
    gids = DataFrame(GameOrder = Int64[])
    base_infos = DataFrame(
        # basic_game_info()
        WhiteUsername = String[], BlackUsername = String[], WhiteElo = Int64[], BlackElo = Int64[], 
        EloDifference = Int64[], TimeControl = String[], Opening = String[], GameId = String[]
    )
    per_move_infos = DataFrame(
        # per_move_info()
        MoveId = Int64[], MovePlayed = String[], RemainingTime = Float64[]
    )
    per_move_predictions = DataFrame(
        # per_move_prediction()
        best_move_1 = String[], best_mate_1 = Int64[], best_score_1 = Int64[], processTime_1 = Float64[], numberofNodes_1 = Int64[], 
        best_move_2 = String[], best_mate_2 = Int64[], best_score_2 = Int64[], processTime_2 = Float64[], numberofNodes_2 = Int64[], 
        best_move_3 = String[], best_mate_3 = Int64[], best_score_3 = Int64[], processTime_3 = Float64[], numberofNodes_3 = Int64[], 
        best_move_4 = String[], best_mate_4 = Int64[], best_score_4 = Int64[], processTime_4 = Float64[], numberofNodes_4 = Int64[], 
        best_move_5 = String[], best_mate_5 = Int64[], best_score_5 = Int64[], processTime_5 = Float64[], numberofNodes_5 = Int64[]
    )

    while true
        # print("\nMove $MoveId processing...")
        write_log(log_filename, "\nMove $MoveId processing...")

        if MoveId == 0
            per_move_predictions = vcat(per_move_predictions, per_move_prediction(stockfish, game, log_filename))
            game = game.next()
            MoveId += 1
            continue
        end
        
        # Add move info and combine output vertically
        gids = vcat(gids, gid)
        base_infos = vcat(base_infos, base_info)
        per_move_infos = vcat(per_move_infos, per_move_info(stockfish, game, MoveId))
        per_move_predictions = vcat(per_move_predictions, per_move_prediction(stockfish, game, log_filename))

        # Check if the node is the last node of the game
        if game.is_end() == true 
            break
        end

        # Next move
        game = game.next()
        MoveId += 1

    end

    # Data combination
    per_move_predictions = per_move_predictions[1:end-1, :]
    game_infos = hcat(gids, base_infos)
    move_infos = hcat(per_move_infos, per_move_predictions)
    game_data = hcat(game_infos, move_infos)

    # Log data analysis
    end_time = time() # End processing ...
    one_game_time_elapsed = end_time - start_time
    # println("Process time: $one_game_time_elapsed")
    write_log(log_filename, "\n")
    write_log(log_filename, "Game $game_order whole process time: $one_game_time_elapsed, number of games: $MoveId")

    log_time = DataFrame(
        GameId = game_order, NumberOfMoves = MoveId, TotalProcessTime = one_game_time_elapsed
    )
    
    CSV.write("$log_filename.csv", log_time, append=true)

    return game_data
    
end

per_game (generic function with 1 method)

In [21]:
# @time one_game = per_game(stockfish, game, 1, log_filename)

In [22]:
function post_game(data)
    # Initialize an array
    move_eva = Int64[]
    game_data = copy(data)
    
    for i in 2:(size(game_data, 1))
        # # Centi-pawn -> Cp()
        # t_n1_score = game_data[i - 1, :best_score_1]
        # t_score = game_data[i, :best_score_1]
        # # Mate -> Mate()
        # t_n1_mate = game_data[i - 1, :best_mate_1]
        # t_mate = game_data[i, :best_mate_1]

        # # diff = Cp(t) - Cp(t-1)
        # diff = t_score - t_n1_score
        # if t_n1_mate != -999999 && t_mate != -999999 # If it is on mating process
        #     # diff = Mate(t) - Mate(t-1)
        #     diff = t_mate - t_n1_mate
        # elseif t_n1_mate == -999999 && t_score == -999999 # If it switches from Cp() to Mate()
        #     # diff = nothing
        #     diff = -999999
        # end

        # push!(move_eva, diff)
        push!(move_eva, game_data[i, :best_score_1])
 
    end

    for i = size(move_eva)[1]:(size(game_data)[1] - 1)
        push!(move_eva, -999999)
    end

    insertcols!(game_data, findfirst(x -> x == "RemainingTime", names(game_data)), :MoveEvaluation => move_eva)

    return game_data
end

post_game (generic function with 1 method)

In [23]:
# post_game(one_game)

In [24]:
# CSV.write("one_game_sample_data.csv", post_game(one_game))

In [25]:
function per_file(stockfish, input_filename::String, output_filename::String, log_filename::String)
    pgn_file = open(input_filename)

    data_template = DataFrame(
        # per_game()
        GameOrder = Int64[],
        # basic_game_info()
        WhiteUsername = String[], BlackUsername = String[], WhiteElo = Int64[], BlackElo = Int64[], 
        EloDifference = Int64[], TimeControl = String[], Opening = String[], GameId = String[],
        # per_move_info() & post_game()
        MoveId = Int64[], MovePlayed = String[], MoveEvaluation = Int64[], RemainingTime = Float64[],
        # per_move_prediction()
        best_move_1 = String[], best_mate_1 = Int64[], best_score_1 = Int64[], processTime_1 = Float64[], numberofNodes_1 = Int64[], 
        best_move_2 = String[], best_mate_2 = Int64[], best_score_2 = Int64[], processTime_2 = Float64[], numberofNodes_2 = Int64[], 
        best_move_3 = String[], best_mate_3 = Int64[], best_score_3 = Int64[], processTime_3 = Float64[], numberofNodes_3 = Int64[], 
        best_move_4 = String[], best_mate_4 = Int64[], best_score_4 = Int64[], processTime_4 = Float64[], numberofNodes_4 = Int64[], 
        best_move_5 = String[], best_mate_5 = Int64[], best_score_5 = Int64[], processTime_5 = Float64[], numberofNodes_5 = Int64[]
    )

    column_order = [
        :GameOrder,
        :WhiteUsername, :BlackUsername, :WhiteElo, :BlackElo,
        :EloDifference, :TimeControl, :Opening, :GameId,
        :MoveId, :MovePlayed, :MoveEvaluation, :RemainingTime,
        :best_move_1, :best_mate_1, :best_score_1, :processTime_1, :numberofNodes_1,
        :best_move_2, :best_mate_2, :best_score_2, :processTime_2, :numberofNodes_2,
        :best_move_3, :best_mate_3, :best_score_3, :processTime_3, :numberofNodes_3,
        :best_move_4, :best_mate_4, :best_score_4, :processTime_4, :numberofNodes_4,
        :best_move_5, :best_mate_5, :best_score_5, :processTime_5, :numberofNodes_5
    ]

    log_time_template = DataFrame(
        GameId = Int64[], NumberOfGame = Int64[], OverAllTime = Float64[]
    )
        
    CSV.write(output_filename, data_template, cols=column_order)
    CSV.write("$log_filename.csv", log_time_template)
    
    # for GameId = 1:3
    while !isnothing(game)
        # println("\nGame $GameId ......")
        write_log(log_filename, "\nGame $GameId ......")
    
        game = cp.read_game(pgn_file)
        if game == nothing
            close(pgn_file)
            return
        end

        one_game_rows = post_game(per_game(stockfish, game, GameId, log_filename))
        CSV.write(output_filename, one_game_rows, append=true, cols=column_order)

    end

    close(pgn_file)

end

per_file (generic function with 1 method)

In [26]:
# test_output_filename = "./test_output.csv"

In [27]:
# per_file(stockfish, input_filename, test_output_filename, log_filename)

In [28]:
function main()
    stockfish = initialize_stockfish(STOCKFISH_PATH)
    for i in 1:3
        for j in 1:3
            cell_name = game_cell_filename((i,j))
            log_filename = "$(LOG_FILE_PATH)P2_$cell_name.log"
            input_filename = "$INPUT_DATA_PATH$cell_name$INPUT_FILETYPE"
            output_filename = "$OUTPUT_DATA_PATH$cell_name$OUTPUT_FILETYPE"
            per_file(stockfish, input_filename, test_output_filename, log_filename)
        end
    end
end

main (generic function with 1 method)

## Execution

In [None]:
main()