#### Installing required dependencies

In [1]:
!pip install chess



In [2]:
import requests
import json

import bs4
import chess
import chess.pgn
import chess.engine
import pandas as pd
from tqdm.notebook import trange, tqdm

#### Main body of parsing process

In [3]:
engine = chess.engine.SimpleEngine.popen_uci('stockfish/stockfish-windows-x86-64-sse41-popcnt.exe')

In [4]:
resp = requests.get('https://lichess.org/study/Mj0wdEVK/pzkfIdqR')
with open('page.html', 'w', encoding='UTF-8') as f:
    f.write(resp.text)

In [5]:
soup = bs4.BeautifulSoup(resp.text, 'lxml')

In [6]:
move_comments = soup.find('script', id='page-init-data')

In [7]:
study = json.loads(move_comments.text)

In [8]:
chapters = [c['id'] for c in study['study']['chapters']]

In [None]:
data = {'uci': [], 'eval': [], 'fen': [], 'comment': []}

for chapter_id in tqdm(chapters):
    resp = requests.get(f'https://lichess.org/study/Mj0wdEVK/{chapter_id}')
    soup = bs4.BeautifulSoup(resp.text, 'lxml')
    move_comments = soup.find('script', id='page-init-data')
    study = json.loads(move_comments.text)
    for ply in tqdm(study['data']['treeParts']):
        data['fen'].append(ply['fen'])
        data['uci'].append(ply.get('uci', ''))
        if not ply.get('comments'):
            data['comment'].append('')
        else:
            comment = '\n'.join(map(lambda x: x['text'], ply['comments']))
            data['comment'].append(comment)
        board = chess.Board(ply['fen'])
        info = engine.analyse(board, chess.engine.Limit(depth=20))
        eval_ = info['score'].white()
        data['eval'].append(eval_)

In [None]:
df = pd.DataFrame(data)
df

In [23]:
df['evaluation'] = df['eval'].apply(lambda x: str(x))
df.drop(columns=['eval'], inplace=True)
df.rename(columns={'uci': 'move'}, inplace=True)
df = df[df.move != '']
df.reset_index(inplace=True)
df.to_parquet('chess_comments.parquet')