In [2]:
### Merge and Map Chess Openings

# Import libraries
import pandas as pd
from io import StringIO


In [10]:
#tsv data
a=pd.read_csv('a.tsv', sep='\t')
b=pd.read_csv('b.tsv', sep='\t')
c=pd.read_csv('c.tsv', sep='\t')
d=pd.read_csv('d.tsv', sep='\t')
e=pd.read_csv('e.tsv', sep='\t')

tsv_data= a,b,c,d,e

merged_df = pd.concat(tsv_data, ignore_index=True)

In [11]:
merged_df.head()

Unnamed: 0,eco,name,pgn
0,A00,Amar Opening,1. Nh3
1,A00,Amar Opening: Paris Gambit,1. Nh3 d5 2. g3 e5 3. f4
2,A00,"Amar Opening: Paris Gambit, Gent Gambit",1. Nh3 d5 2. g3 e5 3. f4 Bxh3 4. Bxh3 exf4 5. ...
3,A00,Amsterdam Attack,1. e3 e5 2. c4 d6 3. Nc3 Nc6 4. b3 Nf6
4,A00,Anderssen's Opening,1. a3


In [17]:


opening_moves = merged_df

def split_pgn_to_columns(df, pgn_column='pgn'):
    max_plies = 0
    split_data = []
    plies_list = []

    # Split each PGN into individual plies and find the maximum number of plies
    for pgn in df[pgn_column]:
        moves = [move for move in pgn.split() if not move.endswith('.')]
        split_data.append(moves)
        plies_list.append(" ".join(moves))  # Join moves into a single string for 'plies' column
        max_plies = max(max_plies, len(moves))
    
    # Add 'plies' column to the DataFrame
    df['plies'] = plies_list

    # Create new columns for each ply
    columns = {f'Move_ply_{i+1}': [] for i in range(max_plies)}

    # Assign moves to corresponding columns
    for moves in split_data:
        for i in range(max_plies):
            if i < len(moves):
                columns[f'Move_ply_{i+1}'].append(moves[i])
            else:
                columns[f'Move_ply_{i+1}'].append(None)
    
    # Add new columns to the DataFrame
    for col_name, col_values in columns.items():
        df[col_name] = col_values

    return df

# Apply the function
opening_moves = split_pgn_to_columns(opening_moves)

print(opening_moves.head())


   eco                                     name  \
0  A00                             Amar Opening   
1  A00               Amar Opening: Paris Gambit   
2  A00  Amar Opening: Paris Gambit, Gent Gambit   
3  A00                         Amsterdam Attack   
4  A00                      Anderssen's Opening   

                                                 pgn Move_ply_1 Move_ply_2  \
0                                             1. Nh3        Nh3       None   
1                           1. Nh3 d5 2. g3 e5 3. f4        Nh3         d5   
2  1. Nh3 d5 2. g3 e5 3. f4 Bxh3 4. Bxh3 exf4 5. ...        Nh3         d5   
3             1. e3 e5 2. c4 d6 3. Nc3 Nc6 4. b3 Nf6         e3         e5   
4                                              1. a3         a3       None   

  Move_ply_3 Move_ply_4 Move_ply_5 Move_ply_6 Move_ply_7  ... Move_ply_28  \
0       None       None       None       None       None  ...        None   
1         g3         e5         f4       None       None  ...        N

In [47]:
print(opening_moves['plies'])

0                                                     Nh3
1                                         Nh3 d5 g3 e5 f4
2            Nh3 d5 g3 e5 f4 Bxh3 Bxh3 exf4 O-O fxg3 hxg3
3                              e3 e5 c4 d6 Nc3 Nc6 b3 Nf6
4                                                      a3
                              ...                        
3463    d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O Be2 e5 O-O ...
3464    d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O Be2 e5 O-O ...
3465    d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O Be2 e5 O-O ...
3466    d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O Be2 e5 O-O ...
3467    d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O Be2 e5 O-O ...
Name: plies, Length: 3468, dtype: object


In [30]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.opening_name = None

class Trie:
    def __init__(self):
        self.root = TrieNode()
    
    def insert(self, moves, opening_name):
        node = self.root
        for move in moves:
            if move not in node.children:
                node.children[move] = TrieNode()
            node = node.children[move]
        node.opening_name = opening_name
    
    def search(self, moves):
        node = self.root
        last_opening_name = None
        for move in moves:
            if move in node.children:
                node = node.children[move]
                if node.opening_name:
                    last_opening_name = node.opening_name
            else:
                break
        return last_opening_name

In [48]:
trie = Trie()
for index, row in opening_moves.iterrows():
    opening = row['name']
    plies = row['plies'].split()
    trie.insert(plies, opening)

In [53]:
game_data = pd.DataFrame({
    'move1': ['Nh3'],
    'move2': ['d5'],
    'move3': ['g3'],
    'move4': ['e5'],
    'move5': ['f4'],
    

})

# Concatenate the moves into a single sequence
game_data['move_sequence'] = game_data.apply(lambda row: row.dropna().tolist(), axis=1)

# Function to determine the opening using the Trie
def determine_opening(move_sequence, trie):
    return trie.search(move_sequence)

# Apply the function to determine openings for each game
game_data['Opening'] = game_data['move_sequence'].apply(lambda moves: determine_opening(moves, trie))

print(game_data[['move_sequence', 'Opening']])

           move_sequence                     Opening
0  [Nh3, d5, g3, e5, f4]  Amar Opening: Paris Gambit


Unnamed: 0,eco,name,pgn,Move_ply_1,Move_ply_2,Move_ply_3,Move_ply_4,Move_ply_5,Move_ply_6,Move_ply_7,...,Move_ply_28,Move_ply_29,Move_ply_30,Move_ply_31,Move_ply_32,Move_ply_33,Move_ply_34,Move_ply_35,Move_ply_36,plies
0,A00,Amar Opening,1. Nh3,Nh3,,,,,,,...,,,,,,,,,,Nh3
1,A00,Amar Opening: Paris Gambit,1. Nh3 d5 2. g3 e5 3. f4,Nh3,d5,g3,e5,f4,,,...,,,,,,,,,,Nh3 d5 g3 e5 f4
2,A00,"Amar Opening: Paris Gambit, Gent Gambit",1. Nh3 d5 2. g3 e5 3. f4 Bxh3 4. Bxh3 exf4 5. ...,Nh3,d5,g3,e5,f4,Bxh3,Bxh3,...,,,,,,,,,,Nh3 d5 g3 e5 f4 Bxh3 Bxh3 exf4 O-O fxg3 hxg3
3,A00,Amsterdam Attack,1. e3 e5 2. c4 d6 3. Nc3 Nc6 4. b3 Nf6,e3,e5,c4,d6,Nc3,Nc6,b3,...,,,,,,,,,,e3 e5 c4 d6 Nc3 Nc6 b3 Nf6
4,A00,Anderssen's Opening,1. a3,a3,,,,,,,...,,,,,,,,,,a3
