In [14]:
import chess
import chess.pgn
import csv
'''
/Users/ruvinjagoda/Downloads/Erigaisi.pgn




'''
pgn_file = "/Users/ruvinjagoda/Downloads/Erigaisi.pgn"

# Function to parse PGN file and yield game data
def read_pgn_file(filename):
    games = []
    with open(filename) as pgn_file:
        while True:
            game = chess.pgn.read_game(pgn_file)
            if game is None:
                break
            games.append(game)
    return games

def mirror_move(move):
    """ Mirror a move for the opposite side of the board. """
    return chess.Move(chess.square_mirror(move.from_square), chess.square_mirror(move.to_square))

def mirror_board(board):
    """ Mirror the board to swap White and Black. """
    mirrored_board = board.mirror()
    return mirrored_board

def is_magnus(player_name):
    """ Check if the player is Magnus Carlsen (case-insensitive). """
    player_name = player_name.lower()
    return "erigaisi" in player_name or "kateryna" in player_name

def get_fen_and_move(games, max_moves=150000):
    move_sequence = []
    game_id = 0
    total_moves = 0

    for game in games:
        if game is None:
            continue

        board = game.board()
        white_player = game.headers["White"]
        black_player = game.headers["Black"]
        move_id = 0

        for move in game.mainline_moves():
            fen = board.fen()
            move_id += 1
            if is_magnus(white_player):
                mirrored_board = mirror_board(board)
                mirrored_move = mirror_move(move)
                move_sequence.append((game_id + 1, move_id, mirrored_board.fen(), mirrored_move.uci()))
            elif is_magnus(black_player):
                move_sequence.append((game_id + 1, move_id, fen, move.uci()))
            board.push(move)

            total_moves += 1
            if total_moves >= max_moves:
                return move_sequence

        game_id += 1

    return move_sequence

def save_dataset(dataset, filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["GameID", "MoveID", "FEN", "Move", "Player"])
        for game_id, move_id, fen, move in dataset:
            writer.writerow([game_id, move_id, fen, move, "Erigaisi"])

# Read PGN file
games = read_pgn_file(pgn_file)
print(len(games))
# Extract the FEN and moves dataset with a limit of 150,000 moves
dataset = get_fen_and_move(games, max_moves=150000)

# Save the dataset to a CSV file
save_dataset(dataset, "./Created data/Erigaisi_moves_dataset_150000.csv")

print("Dataset created and saved as magnus_moves_dataset.csv")


3031
Dataset created and saved as magnus_moves_dataset.csv


In [16]:
import os
import pandas as pd

# Define the directory containing your CSV files
directory = "./Created data"

# List all CSV files in the directory
csv_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.csv')]

# Initialize an empty DataFrame
combined_df = pd.DataFrame()

# Read and concatenate all CSV files
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    combined_df = pd.concat([combined_df, df], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_df.to_csv('combined_dataset.csv', index=False)

print("All files combined and saved as combined_dataset.csv")


All files combined and saved as combined_dataset.csv
