In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [3]:
# Load the dataset
file_path = 'D:/aip/dataset_withseparatedmovesandtime.csv'  # Replace with your file path
data = pd.read_csv(file_path)

In [7]:
# Define a function to split moves into White and Black
def split_moves(moves):
    moves_list = moves.split()  # Split the moves string into a list
    white_moves = moves_list[::2]  # Select moves at even indices (White's moves)
    black_moves = moves_list[1::2]  # Select moves at odd indices (Black's moves)
    return white_moves, black_moves

# Apply the function to create two new columns
data['WhiteMoves'], data['BlackMoves'] = zip(*data['Moves'].apply(split_moves))


In [8]:
# Display the first few rows of the Moves, WhiteMoves, and BlackMoves columns to verify
num_rows_to_check = 5  # Adjust this number as needed to check more rows

# Print original moves and the corresponding split results for a few rows
for index, row in data[['Moves', 'WhiteMoves', 'BlackMoves']].head(num_rows_to_check).iterrows():
    print(f"Original Moves: {row['Moves']}")
    print(f"White's Moves: {row['WhiteMoves']}")
    print(f"Black's Moves: {row['BlackMoves']}")
    print("="*50)  # Separator for clarity


Original Moves: e4 c5 Nf3 Nf6 Nc3 d5 exd5 Nxd5 Bc4 Nxc3 bxc3 e6 O-O Be7 d4 O-O Re1 Qc7 d5 Rd8 Qe2 exd5 Bf4 Qxf4 Qxe7 Rf8 Bxd5 Nc6 Qxc5 Bf5 g3 Qg4 Bxc6 bxc6 Ne5 Qh3 Nxc6 Bxc2 Re2 Bd3 Re3 Ba6 Rae1 h6 Ne7+ Kh8 Nf5 Bb7 f3 Rac8 Qe5 f6 Qe6 Rce8 Qd7
White's Moves: ['e4', 'Nf3', 'Nc3', 'exd5', 'Bc4', 'bxc3', 'O-O', 'd4', 'Re1', 'd5', 'Qe2', 'Bf4', 'Qxe7', 'Bxd5', 'Qxc5', 'g3', 'Bxc6', 'Ne5', 'Nxc6', 'Re2', 'Re3', 'Rae1', 'Ne7+', 'Nf5', 'f3', 'Qe5', 'Qe6', 'Qd7']
Black's Moves: ['c5', 'Nf6', 'd5', 'Nxd5', 'Nxc3', 'e6', 'Be7', 'O-O', 'Qc7', 'Rd8', 'exd5', 'Qxf4', 'Rf8', 'Nc6', 'Bf5', 'Qg4', 'bxc6', 'Qh3', 'Bxc2', 'Bd3', 'Ba6', 'h6', 'Kh8', 'Bb7', 'Rac8', 'f6', 'Rce8']
Original Moves: e4 Nf6 e5 Nd5 d4 d6 c4 Nb6 exd6 cxd6 Nc3 g6 h3 Bg7 Nf3 O-O Be2 Nc6 O-O Bf5 d5 Na5 Nd4 Naxc4 a4 a5 Nxf5 gxf5 Bd3 e6 Qb3 Ne5 Bc2 Nec4 Rd1 Qc7 Nb5 Qc5 dxe6 fxe6 Bf4 d5 Nc7 Qb4 Nxe6 Qxb3 Bxb3 Bxb2 Ra2 Rf6 Nc7 Rc8 Nxd5 Nxd5 Rxd5 Kf8 Bg5 Rg6 Rxf5+ Ke8 Bxc4 Rxc4 Rxb2 Rc1+ Bxc1
White's Moves: ['e4', 'e5', 'd4', 'c4', 'exd6'

In [11]:
# Function to split move times into white and black
def split_move_times(time_sequence):
    times = time_sequence.split()
    white_times = times[::2]  # White's times are at even indices
    black_times = times[1::2]  # Black's times are at odd indices
    return white_times, black_times

# Apply the function to split times and create two new columns
data['WhiteTimes'], data['BlackTimes'] = zip(*data['MoveTimes'].apply(split_move_times))

# Verify the split for a few rows
num_rows_to_check = 5  # Adjust this number as needed to check more rows
for index, row in data[['MoveTimes', 'WhiteTimes', 'BlackTimes']].head(num_rows_to_check).iterrows():
    print(f"Original MoveTimes: {row['MoveTimes']}")
    print(f"White's Times: {row['WhiteTimes']}")
    print(f"Black's Times: {row['BlackTimes']}")
    print("---")


Original MoveTimes: 0:03:00 0:02:56 0:02:59 0:02:55 0:02:58 0:02:54 0:02:58 0:02:55 0:02:58 0:02:53 0:02:59 0:02:32 0:02:58 0:02:32 0:02:58 0:02:32 0:02:58 0:02:29 0:02:56 0:02:25 0:02:47 0:02:08 0:02:47 0:01:40 0:02:47 0:01:40 0:02:47 0:01:40 0:02:40 0:01:09 0:02:38 0:01:07 0:02:23 0:01:08 0:02:23 0:01:08 0:02:20 0:01:04 0:02:08 0:00:59 0:02:08 0:00:47 0:02:03 0:00:39 0:01:48 0:00:39 0:01:48 0:00:28 0:01:45 0:00:29 0:01:30 0:00:26 0:01:23 0:00:02 0:01:22
White's Times: ['0:03:00', '0:02:59', '0:02:58', '0:02:58', '0:02:58', '0:02:59', '0:02:58', '0:02:58', '0:02:58', '0:02:56', '0:02:47', '0:02:47', '0:02:47', '0:02:47', '0:02:40', '0:02:38', '0:02:23', '0:02:23', '0:02:20', '0:02:08', '0:02:08', '0:02:03', '0:01:48', '0:01:48', '0:01:45', '0:01:30', '0:01:23', '0:01:22']
Black's Times: ['0:02:56', '0:02:55', '0:02:54', '0:02:55', '0:02:53', '0:02:32', '0:02:32', '0:02:32', '0:02:29', '0:02:25', '0:02:08', '0:01:40', '0:01:40', '0:01:40', '0:01:09', '0:01:07', '0:01:08', '0:01:08', '0

In [12]:
# Save the updated DataFrame to a new CSV file
output_file_path = 'D:/aip/dataset_withseparatedmovesandtime.csv'
data.to_csv(output_file_path, index=False)

print(f"Updated data saved to {output_file_path}")

Updated data saved to D:/aip/dataset_withseparatedmovesandtime.csv
