In [27]:
import chess.pgn
import pandas as pd
import re  

def pgn_to_dataframe(pgn_file, num_games=10):
    games = []
    
    # Open the PGN file
    with open(pgn_file, 'r') as f:
        for i in range(num_games):
            game = chess.pgn.read_game(f)
            if game is None:
                break  
            
            
            site = game.headers.get('Site', '')
            
            
            match = re.search(r'lichess.org/(\w+)', site)
            site_id = match.group(1) if match else site
            
            game_data = {
                'Event': game.headers.get('Event', ''),
                'Site': site_id,  
                'UTCDate': game.headers.get('UTCDate', ''),
                'UTCTime': game.headers.get('UTCTime', ''),
                'White': game.headers.get('White', ''),
                'WhiteElo': game.headers.get('WhiteElo', ''),
                'WhiteRatingDiff': game.headers.get('WhiteRatingDiff', ''),
                'Black': game.headers.get('Black', ''),
                'BlackElo': game.headers.get('BlackElo', ''),
                'BlackRatingDiff': game.headers.get('BlackRatingDiff', ''),
                'Result': game.headers.get('Result', ''),
                'Moves': ' '.join([move.uci() for move in game.mainline()]) 
            }
            
            games.append(game_data)
    

    df = pd.DataFrame(games)
    return df

pgn_file = 'Dataset/lichess_db_standard_rated_2015-04.pgn'  
df = pgn_to_dataframe(pgn_file, num_games=1000)  


df



Unnamed: 0,Event,Site,UTCDate,UTCTime,White,WhiteElo,WhiteRatingDiff,Black,BlackElo,BlackRatingDiff,Result,Moves
0,Rated Blitz game,2xXa7xLj,2015.03.31,22:00:09,TheMagBumper,1577,5.0,hbustamantep,1383,-5.0,1-0,e2e4 g7g6 c2c3 e7e6 d2d4 f8g7 g1f3 a7a6 c1e3 b...
1,Rated Classical game,mQQKx0p4,2015.03.31,22:00:19,sacha-19683,1750,9.0,Matagorda,1696,-9.0,1-0,g1f3 d7d5 d2d4 e7e6 c2c4 d5c4 b1c3 c8d7 e2e4 b...
2,Rated Classical game,61C77Fz0,2015.03.31,22:00:22,Atlet27,1499,2.0,flick,1135,-4.0,1-0,e2e4 e7e5 g1f3 f8d6 d2d4 d6b4 c1d2 b4d2 b1d2 d...
3,Rated Bullet game,naulhHJf,2015.03.31,22:00:11,aristillusbas,1296,-8.0,Carflippedover,1441,7.0,0-1,e2e3 e7e6 f2f4 f8b4
4,Rated Blitz tournament https://lichess.org/tou...,tihphhiT,2015.03.31,22:00:31,shhone,1730,,layura,1851,,0-1,
5,Rated Bullet game,pLEt1siB,2015.03.31,22:00:18,Horse123,1681,12.0,the_fridge,1727,-12.0,1-0,e2e4 c7c5 g1f3 b8c6 b1c3 g8f6 d2d3 e7e6 f1e2 a...
6,Rated Blitz tournament https://lichess.org/tou...,k0ZYuiox,2015.03.31,22:00:04,andre032,1821,-8.0,marcelosx,1529,8.0,1/2-1/2,e2e4 c7c6 f2f4 d7d5 e4e5 c8f5 g1f3 h7h6 d2d4 e...
7,Rated Blitz game,J840PB0K,2015.03.31,22:00:15,juliocoltrane,1874,2.0,simoni2525,1459,-3.0,1-0,c2c4 e7e5 b1c3 d7d6 g2g3 g8f6 f1g2 c7c6 g1f3 f...
8,Rated Bullet tournament https://lichess.org/to...,V40Gyld0,2015.03.31,22:00:01,DFDSFSDFSDF,1500,110.0,Toljamo,1332,-23.0,1-0,d2d4 d7d5 e2e4 b8c6 c2c4 d5e4 d4d5 c6e5 b1c3 g...
9,Rated Blitz game,uCprhl5A,2015.03.31,22:00:11,r4mp24l1s,1772,-6.0,oljica,1961,6.0,0-1,e2e4 d7d5 e4d5 d8d5 h2h3 e7e5 b1c3 d5a5 f1c4 g...
