# Lichess Puzzles

### Ein Überblick über den Datensatz

In [1]:
import numpy
import pandas
import pickle

Das Lichess Puzzle Dataset kann man sich von https://database.lichess.org/#puzzles herunterladen.

In [2]:
column_names = ('PuzzleId','FEN','Moves','Rating','RatingDeviation','Popularity','NbPlays','Themes','GameUrl')
lichess_puzzles = pandas.read_csv("data/lichess_db_puzzle.csv", header=None, names=column_names)

In [33]:
lichess_puzzles.head()

Unnamed: 0,PuzzleId,FEN,Moves,Rating,RatingDeviation,Popularity,NbPlays,Themes,GameUrl
0,00008,r6k/pp2r2p/4Rp1Q/3p4/8/1N1P2R1/PqP2bPP/7K b - ...,f2g3 e6e7 b2b1 b3c1 b1c1 h6c1,1974,74,91,324,crushing hangingPiece long middlegame,https://lichess.org/787zsVup/black#48
1,0000D,5rk1/1p3ppp/pq3b2/8/8/1P1Q1N2/P4PPP/3R2K1 w - ...,d3d6 f8d8 d6d8 f6d8,1506,73,96,6490,advantage endgame short,https://lichess.org/F8M8OS71#53
2,0009B,r2qr1k1/b1p2ppp/pp4n1/P1P1p3/4P1n1/B2P2Pb/3NBP...,b6c5 e2g4 h3g4 d1g4,1111,75,85,482,advantage middlegame short,https://lichess.org/4MWQCxQ6/black#32
3,000aY,r4rk1/pp3ppp/2n1b3/q1pp2B1/8/P1Q2NP1/1PP1PP1P/...,g5e7 a5c3 b2c3 c6e7,1367,74,92,318,advantage master middlegame short,https://lichess.org/iihZGl6t#29
4,000h7,3q1rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2...,d8a8 g5g6 h7g6 h6g7,2349,86,83,223,advancedPawn crushing kingsideAttack middlegam...,https://lichess.org/FLmpZbTm/black#52


In [4]:
lichess_puzzles.describe()

Unnamed: 0,Rating,RatingDeviation,Popularity,NbPlays
count,1871838.0,1871838.0,1871838.0,1871838.0
mean,1526.02,97.43574,81.02724,711.6815
std,466.6261,65.57248,27.96751,2413.816
min,511.0,50.0,-102.0,0.0
25%,1169.0,75.0,79.0,60.0
50%,1495.0,78.0,89.0,225.0
75%,1858.0,90.0,94.0,614.0
max,2886.0,500.0,101.0,309613.0


### Suche nach schwierigen Aufgaben

In [8]:
# This flag allows to disable the use of already computed data after
# changing filter parameters or updating the puzzle database
use_cache = True

# Relative path to cache file
cache_file = "pickles/hard_puzzles.pickle"

In [22]:
def find_hard_puzzles(puzzles_df: pandas.DataFrame, min_nbPlays=100, min_rating=2500, max_deviation=80, deviation_scaling=0.1):
    hard_puzzles = []
    for index, puzzle in puzzles_df.iterrows():
        if puzzle["NbPlays"] >= min_nbPlays and puzzle["Rating"] >= min_rating and \
                puzzle["RatingDeviation"] <= max_deviation + (puzzle["Rating"] - min_rating) * deviation_scaling:
            hard_puzzles.append(puzzle)
    return hard_puzzles

In [12]:
if use_cache:
    try:
        with open(cache_file, "rb") as fp:  # Unpickling if already computed
            hard_puzzles = pickle.load(fp)
    except:
        hard_puzzles = find_hard_puzzles(lichess_puzzles)
else:
    hard_puzzles = find_hard_puzzles(lichess_puzzles)

# save hard puzzles for later
with open(cache_file, "wb") as fp:
    pickle.dump(hard_puzzles, fp)

In [75]:
def print_puzzle_list(puzzles, show_move_number=False, show_game_url=True):
    head = "ID    Rating  Deviation  NbPlays  Popularity"
    if show_move_number:
        head += "  Moves"
    if show_game_url:
        head += "  GameURL" 
    print(head)
    for puzzle in puzzles:
        line = puzzle["PuzzleId"] + "   " + str.rjust(str(puzzle["Rating"]),4) + "        " + \
                    str.rjust(str(puzzle["RatingDeviation"]), 3) + \
                    str.rjust(str(puzzle["NbPlays"]), 9) + "          " + str(puzzle["Popularity"])
        if show_move_number:
            line += str.rjust(str((puzzle["Moves"].count(' ') + 1) // 2), 7)
        if show_game_url:
            line += "  " + puzzle["GameUrl"]
        print(line)
    print("Insgesamt:", len(puzzles))

In [45]:
print_puzzle_list(hard_puzzles, show_game_url=False)

ID    Rating  Deviation  NbPlays  Popularity
005yO   2783         98      182          87
00EUu   2583         86      364          91
00FGo   2791        108      558          90
00Jku   2687         90      124          88
012Fl   2546         77      169          93
01BG7   2565         80      132          93
01M07   2603         77      306          85
02TxK   2751        105      541          93
02YEp   2575         76     1522          94
02ti8   2734         89      988          93
033uv   2501         79      207          83
034qW   2672         86      707          95
03ASa   2689         95      128          85
03gc3   2763         93      102          80
03r9U   2693         95      199          85
03wAt   2534         77      365          93
040I3   2576         78      504          95
04GC9   2690         98      673          92
04L0r   2549         80      107          88
04S0T   2793        109      350          81
04TFa   2597         84      109          76
04rYU   27

7qWz0   2684         79      276          94
7qhqM   2747         99      721          92
7qsOH   2797        104      163          74
7r5NG   2517         78      187          96
7rObt   2706         94      144          75
7s4Fj   2506         78     1381          95
7sXmD   2798        106      218          84
7sZSi   2587         83      294          83
7sa3P   2671         92      383          85
7slTf   2657         87     1434          94
7tDBq   2755        103      116          94
7tNcB   2527         81     1530          92
7tSlm   2631         92     1813          91
7tgDU   2566         79      359          91
7tine   2604         87      188          91
7u4Iw   2659         83      290          83
7uOON   2648         83      398          86
7uxO2   2586         88      332          75
7v0qD   2537         76      530          93
7v4BY   2554         78      560          96
7vHk9   2732         95      759          86
7vbAo   2503         75      278          93
7vj72   25

EnRxM   2688         82      173          90
EnhvW   2717         90      102          69
Eo8rC   2714         96      970          93
EoDFI   2594         86      267          83
EouUm   2677         90      219          94
EpFXH   2781        105      173          77
EpGaa   2680         89      378          93
EpVY0   2664         82     1455          92
Epax2   2721         92      883          83
EpbVn   2584         81     1616          93
EqAw3   2651         90      128          17
EqCnd   2791        105      477          90
EqS7Y   2659         82      589          97
Er7jZ   2525         77     1305          93
ErDjG   2614         88      103          95
ErQlB   2668         87      377          85
ErWML   2657         91      379          84
ErWpS   2630         87      161          78
Es6V7   2531         81      366          92
EsABh   2785         99      281          77
EsFam   2601         83      123          78
EsKbC   2664         90      116          74
Esnv1   25

MhyzM   2705         97      312          85
MiGoE   2691         89      165          94
MiaQ2   2598         81      207          92
MiuSq   2696         91      110          76
MjB0Z   2562         83      325          85
MjLTL   2703         87      110          100
MjoEr   2565         83      169          95
MjtMv   2699         91      251          85
MkCT2   2559         85     1006          89
MkODZ   2759        100      207          98
MkZZM   2775         98      177          95
Mkkm8   2786         95      254          84
Mksk7   2799        106      141          91
Mkzny   2694         88      313          92
Ml9PJ   2673         89      258          83
MlFVr   2796        101      129          63
MlvLC   2518         75      886          95
MmGDa   2685         88      182          97
Mmbb3   2727         88      413          83
Mmub4   2770         91      132          93
MnGYt   2519         78     1769          93
Mnfqn   2649         87      172          88
MnqAM   2

Ss5ei   2581         79      165          96
SsARj   2504         77      120          82
SsKMJ   2542         84      200          96
SsUFg   2574         86      756          92
SsX6q   2631         87      175          94
Ssd48   2721         91      144          99
Sse68   2611         80      106          93
SsjVX   2591         82      131          77
Ssl0e   2749         89      102          61
SsnBb   2652         92      534          86
StEyQ   2726         96      406          82
StIzP   2679         84      183          94
SuA3S   2729        100      252          86
SvC0i   2623         80      164          80
SvC9e   2685         98      397          91
SwGti   2533         79      142          66
Swhm6   2583         84     1880          92
Sx0GK   2655         85      164          81
Sx61M   2655         86      131          67
SxFuN   2508         80      243          95
Sy7g6   2654         89      640          94
SyAPV   2779        106      280          83
SyEpF   25

ZpiTt   2660         88      365          88
ZqBFy   2717         98      139          95
ZqGCo   2660         87      168          95
ZqcSO   2634         81      108          68
ZqfHa   2512         80      598          92
Zqfaq   2571         86      172          82
ZqnCz   2733         90      118          98
ZrVhx   2570         85      535          86
Zrw6e   2563         82      227          86
Zrywh   2758         88      100          76
Zs1mq   2679         87     1148          94
Zs55w   2777        106      426          87
Zs9Y8   2784         99      157          87
ZskcU   2549         80     1609          92
ZsnvK   2558         81     1775          92
Zt7EO   2729         87     1843          83
ZtGmB   2647         83      406          96
ZtKzC   2593         84      391          92
ZtULi   2588         85      108          72
ZuDZh   2743         99      431          87
ZuM0x   2570         81      105          99
ZuUmf   2714         90      206          96
ZujT8   27

gSiRb   2564         80      217          83
gSiT2   2626         83     1550          95
gTArU   2681         92      160          82
gTJWj   2742         87      330          87
gTu6g   2788         91      239          98
gUXw5   2676         97      141          92
gUaZd   2522         82     1260          89
gUjku   2658         90      597          93
gV8BV   2615         84      160          86
gVAGf   2628         91      142          76
gVL7K   2706         89      327          93
gVXsj   2727        100      255          82
gVZaG   2545         77      946          95
gVaiN   2707         87      261          94
gVbMF   2503         80      222          84
gVk3y   2695         98      106          91
gVkiP   2550         79     1485          92
gVqYq   2690         92      148          92
gWKAh   2799         99      599          85
gWLhs   2510         76      801          91
gWQu5   2778        103      139          82
gWc9B   2541         80      481          85
gWcOZ   27

niRmH   2668         88      182          79
niShs   2506         78      200          90
niVtg   2509         79      273          95
niXJL   2664         84     1294          92
njG3w   2592         80      312          84
njyO1   2777        104      230          87
nkB5E   2613         90      450          90
nkE8j   2536         81      145          77
nkJxc   2510         80      616          94
nkaCd   2797        108      123          82
nkua6   2765        106      460          67
nlIqQ   2697         89     1077          93
nlSNp   2695         91      228          94
nlY0J   2561         82      500          92
nlbrv   2545         83      194          78
nluIw   2712         79      134          94
nm0ib   2603         87      721          93
nmQSW   2797        107      177          87
nmWV7   2535         82      354          85
nmXLU   2560         80      163          94
nmfSL   2541         84      529          92
nmhNU   2534         76      498          94
nnhNQ   25

vwCPs   2725         92      693          85
vwCg8   2585         84      162          98
vwViu   2626         83      130          96
vweIC   2567         78      655          94
vwkuL   2560         77      863          96
vwreJ   2582         77      260          93
vx7bd   2602         84      187          79
vx8zZ   2782         99      140          73
vxIiu   2549         80      171          79
vxzPm   2735         95      268          94
vy2dG   2748         94      376          93
vyY9j   2727         94      337          88
vz5Xa   2634         93      112          86
vzK27   2732         93      647          87
vzPsj   2539         80      250          97
vzSSa   2626         90      408          94
vzjQ4   2740         94      227          85
vzlQ7   2742        103      127          88
vzxTN   2771        101      245          81
w0YCX   2735         90      236          94
w1AIc   2751         95      288          84
w1D5u   2584         78      457          92
w1EJR   27

In [30]:
hard_puzzles_df = pandas.DataFrame.from_records(hard_puzzles, columns=column_names)

superhard_puzzles = find_hard_puzzles(hard_puzzles_df, \
                                      min_nbPlays=140, min_rating=2600, max_deviation=75, deviation_scaling=0.08)

superhard_puzzles.sort(reverse=True, key=lambda p: p["Rating"])

In [46]:
print_puzzle_list(superhard_puzzles)

ID    Rating  Deviation  NbPlays  Popularity  GameURL
mW0J3   2878         92      348          89  https://lichess.org/Fz1ew5a9#97
F9PRJ   2799         90      172          83  https://lichess.org/Gqql5MbC/black#38
GX19F   2799         86      602          93  https://lichess.org/PgMsNJ4S/black#78
Wiw7f   2799         87      334          91  https://lichess.org/z3JKtf3o/black#76
1rQVB   2798         87      339          88  https://lichess.org/mhJp2mxn/black#58
54zQR   2798         90      427          95  https://lichess.org/oQ0eCQaX/black#32
ojaVO   2798         88      696          94  https://lichess.org/zmlMrzs0#83
GrsBq   2797         90      673          93  https://lichess.org/9glDsAzH#61
UajG5   2797         90      177          84  https://lichess.org/0RISYYGF#99
pzMts   2797         90      177          83  https://lichess.org/eZuXKyjA/black#64
v1MdQ   2797         89      157          95  https://lichess.org/zBt1MEio#79
G4dQn   2796         88      172          80  https:

### Sehr lange Aufgaben

Hier suche ich anhand der Zuganzahl in der Lösung unter den schwierigsten Aufgaben, nach denen mit den meisten erzwungenen Zügen und nach den längsten Matts.

In [52]:
def find_long_puzzles(puzzles_df: pandas.DataFrame, only_mates=False, min_moves=8):
    '''Filter a puzzle dataset for puzzles that require the player to make many correct single moves in a row.'''
    long_puzzles = []
    for index, puzzle in puzzles_df.iterrows():
        if (not only_mates or "mate" in puzzle["Themes"]) and puzzle["Moves"].count(' ') >= 2 * min_moves -1:
            long_puzzles.append(puzzle)
    return long_puzzles

In [54]:
long_hard_puzzles = find_long_puzzles(hard_puzzles_df)
long_hard_puzzles.sort(reverse=True, key=lambda p: 10000 * p["Moves"].count(' ') + p["Rating"])
print_puzzle_list(long_hard_puzzles, show_move_number=True)

ID    Rating  Deviation  NbPlays  Popularity  Moves  GameURL
CkCGL   2744        101      186          82     13  https://lichess.org/aIlrTXV1/black#100
xQtzd   2772        101      164          80     12  https://lichess.org/e3Nrvy5r#51
rjjL0   2760         88      334          97     12  https://lichess.org/os7B4j2t#73
Wonmj   2728         95      431          87     12  https://lichess.org/fUOEXrJ6/black#90
sc6mG   2797        102      253          86     11  https://lichess.org/dV5WlvsG#45
7D0N4   2793        101      445          93     11  https://lichess.org/Yiyk2ydB/black#88
HrQoy   2748         95      444          92     11  https://lichess.org/UEpGnB3a#97
1P1ML   2712         98      824          90     11  https://lichess.org/HtM7uoph/black#98
1mdK7   2696         90      265          95     11  https://lichess.org/Dw2tZCoV/black#78
BvIfy   2685         96     1164          91     11  https://lichess.org/9lWeXyUq/black#70
JZHWt   2682         86      177          77     11 

In [56]:
long_mates = find_long_puzzles(hard_puzzles_df, only_mates=True, min_moves=7)
long_mates.sort(reverse=True, key=lambda p: 10000 * p["Moves"].count(' ') + p["Rating"])
print_puzzle_list(long_mates, show_move_number=True)

ID    Rating  Deviation  NbPlays  Popularity  Moves  GameURL
kuxpS   2701         97     1296          94     10  https://lichess.org/2ZGZECbS#65
kxyJ7   2758         95     1624          95      8  https://lichess.org/vkyuwHBc#41
QGtzX   2679         94     1746          93      8  https://lichess.org/HydDdc2X/black#70
uxBsV   2678         96      208          89      8  https://lichess.org/MsPdISIs/black#38
Swhm6   2583         84     1880          92      8  https://lichess.org/xqa9Ospm/black#20
42guN   2761         93     1150          94      7  https://lichess.org/BYYWwm4m#47
eoA0f   2754        101      761          86      7  https://lichess.org/5SvlonmZ/black#58
Lk9ZY   2726         92     1058          93      7  https://lichess.org/sz6bmlph/black#40
5QUgd   2685         92     1831          97      7  https://lichess.org/jcPI9orx#65
88g2C   2684         96      205          86      7  https://lichess.org/CKUjthOO/black#36
73N3v   2682         93      649          86      7  

Eine Überprüfung auf dem gesamten Aufgaben-Set hat ergeben, dass dieses Matt in 10 tatsächlich auch das längest Matt in den Lichess-Aufgaben ist.

### Beliebte und unbeliebte Aufgaben

Der mittlere Beliebtheitswert liegt bei 89 (der Durchschnitt wird von einigen klaren Ausreißern auf 81 heruntergezogen), das 75-Prozentil bei 94. Daher wird bei unbeliebten Aufgaben vermutlich deutlich klarer sein, wieso sie als unbefriedigend markiert wurden, während die beliebtesten nur minimal herausstechen werden.

In [73]:
def find_popular_puzzles(puzzles_df: pandas.DataFrame, min_popularity=100, min_nbPlays=150):
    popular_puzzles = []
    for index, puzzle in puzzles_df.iterrows():
        if puzzle["NbPlays"] >= min_nbPlays and puzzle["Popularity"] >= min_popularity:
            popular_puzzles.append(puzzle)
    return popular_puzzles

def find_unpopular_puzzles(puzzles_df: pandas.DataFrame, max_popularity=-1, min_nbPlays=150):
    unpopular_puzzles = []
    for index, puzzle in puzzles_df.iterrows():
        if puzzle["NbPlays"] >= min_nbPlays and puzzle["Popularity"] <= max_popularity:
            unpopular_puzzles.append(puzzle)
    return unpopular_puzzles

In [71]:
popular_hard_puzzles = find_popular_puzzles(hard_puzzles_df)
popular_hard_puzzles.sort(reverse=True, key=lambda p: p["Rating"])
print_puzzle_list(popular_hard_puzzles)

ID    Rating  Deviation  NbPlays  Popularity  GameURL
W0VRm   2779         99      163          100  https://lichess.org/sMDukdml#67
ichQR   2773         89      202          100  https://lichess.org/zH1YStqu#81
IOcwT   2772         91      158          100  https://lichess.org/PpojrkSg#69
FFH83   2757         88      151          100  https://lichess.org/MOxQ4tIC/black#84
14vCC   2712         90      153          100  https://lichess.org/WFyW7hVZ#125
NtiYk   2673         82      151          100  https://lichess.org/Gk7iCzBH/black#84
lKqo1   2667         88      262          100  https://lichess.org/MoJYu1pr/black#104
oeRHl   2655         89      199          100  https://lichess.org/rkJHXKDC#109
puTR5   2630         82      217          100  https://lichess.org/m926Q3aN/black#46
EYcFC   2624         83      174          100  https://lichess.org/aTLW60YH#113
cmffN   2622         80      235          100  https://lichess.org/E62xt680/black#60
r0CBr   2593         82      164          1

In [69]:
unpopular_hard_puzzles = find_unpopular_puzzles(hard_puzzles_df, max_popularity=50, min_nbPlays=100)
unpopular_hard_puzzles.sort(reverse=False, key=lambda p: p["Popularity"]*1000 - p["Rating"])
print_puzzle_list(unpopular_hard_puzzles)

ID    Rating  Deviation  NbPlays  Popularity  GameURL
EqAw3   2651         90      128          17  https://lichess.org/EYnrqrLd#69
clPWW   2783         98      252          34  https://lichess.org/J7aakb8W/black#46
EYjDO   2710        101      275          46  https://lichess.org/lfawOvsn/black#102
tqSCl   2517         81      107          47  https://lichess.org/r1WofGZr#59
RwmwV   2501         79      360          48  https://lichess.org/keMktfdU#99
7O9mh   2769         97      235          49  https://lichess.org/t7sEDMs7#57
XAeCj   2535         81      216          49  https://lichess.org/351tqD7W/black#100
xMHjN   2798         93      121          50  https://lichess.org/ElMfgOkK/black#26
5sJlK   2666         85      110          50  https://lichess.org/JBOuWp1F#61
0ObIa   2640         93      106          50  https://lichess.org/MZa6tqBz/black#74
Insgesamt: 10


In [76]:
# Über den gesamten Datensatz
unpopular_puzzles = find_unpopular_puzzles(lichess_puzzles)
unpopular_puzzles.sort(reverse=False, key=lambda p: p["Popularity"]*1000 - p["Rating"])
print_puzzle_list(unpopular_puzzles)

ID    Rating  Deviation  NbPlays  Popularity  GameURL
8BkZm   1315         78      416          -52  https://lichess.org/RmcJsyAV/black#96
imMRZ   1400         80      153          -43  https://lichess.org/QcZb2rvs#47
CTLPO   1131         77      216          -43  https://lichess.org/hV6QIRdj/black#44
UQ80X   1240         79      162          -33  https://lichess.org/7hBf5FUo/black#56
gD7mJ    689         88      191          -30  https://lichess.org/P16dgGrN/black#76
IhAMK    598        104      157          -29  https://lichess.org/cWTribHK#27
HLEwA   1336         78      153          -26  https://lichess.org/cdnXFHsK#29
6kWcc   1295         78      155          -26  https://lichess.org/cgU3cZzd#55
lexqf   1238         80      174          -26  https://lichess.org/zkQvqKIW/black#88
zj66O   1429         79      152          -24  https://lichess.org/P4CQ7zWx/black#50
vkL76   1513         77      157          -22  https://lichess.org/5LcOFGMZ#75
xZFk4   1465         77      282         