# Chess Neural Network

Building a neural network that learns to play chess based on my gameplay data.

## Goal
Train a move prediction model to create an AI that plays like me.




In [15]:
# Importing necessary libraries
import pandas as pd
import ast
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

## Step 1: Load Chess Data

Load cleaned data from csv file


In [None]:
df = pd.read_csv("/Users/riteshbhandari/Documents/Dokumentit – Ritesh - MacBook Pro/GitHub/Chess-engine/bot/data-analysis/cleaned_data.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/anany/OneDrive/Documents/Chess-engine/bot/data-analysis/cleaned_data.csv'

# Step 2: Pre-processing

2.1 Turning features to numerical values (moves.. etc) 

2.2 Encoding the features to be trained

In [None]:
# convert string representation of list to actual list
df["moves"] = df["moves"].apply(ast.literal_eval)

# saving all the moves to single list to be encoded 
every_move = []
for game in df["moves"]:
    for move in game:
        every_move.append(move[1])

every_move[:10]

['e4', 'e6', 'd4', 'Qh4', 'Nc3', 'f5', 'Nf3', 'Qe7', 'e5', 'Qb4']

In [None]:
# getting all the unique moves
unique_moves = set(every_move)  # just the unique moves
print("Number of different moves:", len(unique_moves))
print()

# give move a number
move_to_number = {}

# turning integer back to moves (for future use)
number_to_move= {}

for i, move in enumerate(unique_moves):
    move_to_number[move] = i
    number_to_move[i] = move

# turning all the numbers into integers
number_moves = []
for move in every_move:
    number_moves.append(move_to_number[move])

# first 10 moves
print("First 10 moves as numbers: ")
print(number_moves[:10])
print()

# first 10 original moves
print("First 10 original moves: ")
print(every_move[:10]) 

Number of different moves: 1927

First 10 moves as numbers: 
[391, 1078, 690, 1585, 223, 1424, 1850, 667, 945, 1099]

First 10 original moves: 
['e4', 'e6', 'd4', 'Qh4', 'Nc3', 'f5', 'Nf3', 'Qe7', 'e5', 'Qb4']


In [None]:
# Create colors per game
colors_per_game = []

for game in df["moves"]:
    game_colors = []
    for move in game:
        if move[0] == "white":
            game_colors.append(1)
        else:  # black
            game_colors.append(0)
    colors_per_game.append(game_colors)

# Add to DataFrame
df["colors"] = colors_per_game

# Check first row
print(df[["moves", "colors"]].iloc[0])


moves     [(white, e4, True), (black, e6, False), (white...
colors    [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, ...
Name: 0, dtype: object


In [None]:
# turn white or black to integers ( 1  = (White), 0 = (Black) )
teoriat_moves_per_game = []

for game in df["moves"]:
    game_teoriat = []
    for move in game:
        if move[2] == True:
            game_teoriat.append(1)
        else:
            game_teoriat.append(0)
    teoriat_moves_per_game.append(game_teoriat)

# Add to DataFrame
df["teoriat_moves"] = teoriat_moves_per_game

# Check first row
print(df[["moves", "teoriat_moves"]].iloc[0])

moves            [(white, e4, True), (black, e6, False), (white...
teoriat_moves    [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, ...
Name: 0, dtype: object


In [None]:
#combine all the features into a single list of tuples seperated by games 
combined_features_per_game = []

for game_idx in range(len(df)):
    game_combined = []
    moves = df["moves"].iloc[game_idx]
    colors = df["colors"].iloc[game_idx]
    teoriat = df["teoriat_moves"].iloc[game_idx]
    
    for i in range(len(moves)):
        # (color, move_as_integer, your_move)
        game_combined.append((
            colors[i],
            move_to_number[moves[i][1]],  # convert move to integer
            teoriat[i]
        ))
    # Append the combined features for the game
    combined_features_per_game.append(game_combined)

# Add to DataFrame
df["game_data"] = combined_features_per_game

# Check first game data
print(combined_features_per_game[0])

[(1, 391, 1), (0, 1078, 0), (1, 690, 1), (0, 1585, 0), (1, 223, 1), (0, 1424, 0), (1, 1850, 1), (0, 667, 0), (1, 945, 1), (0, 1099, 0), (1, 1205, 1), (0, 521, 0), (1, 38, 1), (0, 331, 0), (1, 587, 1), (0, 665, 0), (1, 121, 1), (0, 1094, 0), (1, 1551, 1), (0, 1044, 0), (1, 1874, 1), (0, 1562, 0), (1, 769, 1), (0, 1523, 0), (1, 632, 1), (0, 1835, 0), (1, 267, 1), (0, 1794, 0), (1, 525, 1), (0, 1225, 0), (1, 1874, 1), (0, 1313, 0), (1, 1624, 1), (0, 1427, 0), (1, 541, 1), (0, 1512, 0), (1, 753, 1), (0, 963, 0), (1, 760, 1), (0, 984, 0), (1, 517, 1), (0, 729, 0), (1, 1861, 1), (0, 534, 0), (1, 541, 1), (0, 1626, 0), (1, 1721, 1), (0, 780, 0), (1, 322, 1), (0, 691, 0), (1, 161, 1), (0, 1789, 0), (1, 290, 1), (0, 1094, 0), (1, 1903, 1), (0, 1403, 0), (1, 541, 1), (0, 826, 0), (1, 760, 1), (0, 677, 0), (1, 1331, 1), (0, 1038, 0), (1, 852, 1), (0, 1468, 0), (1, 1157, 1), (0, 1287, 0), (1, 1106, 1), (0, 1182, 0), (1, 1331, 1), (0, 1287, 0), (1, 1106, 1), (0, 1182, 0), (1, 1757, 1), (0, 1442, 0)

In [None]:
# lets drop the columns with unneccessary data 
df = df.drop(columns=["moves", "colors", "teoriat_moves", "first_move","num_moves"])
df.head()

Unnamed: 0,game_id,game_data
0,123118274906,"[(1, 391, 1), (0, 1078, 0), (1, 690, 1), (0, 1..."
1,123118510404,"[(1, 690, 0), (0, 462, 1), (1, 1425, 0), (0, 9..."
2,123118790014,"[(1, 391, 0), (0, 945, 1), (1, 1850, 0), (0, 1..."
3,123158939328,"[(1, 391, 0), (0, 1425, 1), (1, 945, 0), (0, 5..."
4,123160166430,"[(1, 391, 1), (0, 945, 0), (1, 585, 1), (0, 11..."


### Step 2.2:  Embedded layer Encoding 


In [None]:
unique_moves = set()
for game in df["game_data"]:
    for (color, move, your_move) in game:
        unique_moves.add(move)

print(f"Number of unique moves: {len(unique_moves)}")
print(f"Move range: {min(unique_moves)} to {max(unique_moves)}")

Total training samples: 20966
Vocabulary size (unique moves): 1638
