In [1]:
'''
In this part we build our 2-param MIRT Model and train it on the participants data.
Afterwards we extract the subskills and use these to generate training puzzles for the participants.
'''

import numpy as np
import sklearn as sklearn
from sklearn import linear_model
import openpyxl
import csv
import math

np.set_printoptions(suppress=True, precision=10) #setting print options

themes = ['castling', 'sacrifice', 'mateIn3', 'capturingDefender', 'doubleCheck', 'Bishop', 'mateIn4', 'clearance', 'enPassant', 'hangingPiece', 'exposedKing', 'opening', 'deflection', 'intermezzo', 'fork', 'crushing', 'advancedPawn', 'King', 'endgame', 'Pawn', 'Rook', 'promotion', 'mateIn2', 'Queen', 'trappedPiece', 'pin', 'skewer', 'xRayAttack', 'discoveredAttack', 'middlegame', 'interference', 'advantage', 'defensiveMove', 'attraction', 'mateIn5', 'kingsideAttack', 'Knight', 'queensideAttack']
excluded_themes = []    # if I would have liked to make further changes to the skills, I could have added themes I wanted to exclude here.
                        # These themes would then not have been used to train the MIRT model. 
                        # The idea behind it was, that some participants might have reached the skill ceiling in some skills like 'enPassant', but this was neglected due to poor performances of the model.
themesWithoutExcludedThemes = [theme for theme in themes if theme not in excluded_themes]

completedPuzzles = []
y_Row = []

'''
The following was done for Participant1, to get the results for the other participants one can simply exchange the files.
Everything else is automatic.
'''

filenames = ['Participant1_1300/Participant1_InitialPuzzles1300_solved.xlsx'] #This is the file where the participant documented whether the puzzle was solved correctly or not.
csvFilenames = ['Wieland/InitialPuzzles1300.csv']
for i in range(len(filenames)):
    wb = openpyxl.load_workbook(filenames[i]) #Read the file
    sheet = wb.active
    data = []


    for row in sheet.iter_rows(values_only=True):
        data.append(list(row))

    y_Row_Pre = [] #the yRow will store all the 0's for wrongly solved puzzles and 1's for correctly solved puzzles. We don't write this into the yRow directly, as we check for some errors in the next part.
    for row in data[1:]: #Get the rows starting from row[1], so removing the header
        y_Row_Pre.append(row[1])

    initialPuzzles = []
    with open(csvFilenames[i], newline='') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in spamreader:
            initialPuzzles.append(row)
    initialPuzzles.remove(initialPuzzles[0]) #This removes the header

    for i in range(len(y_Row_Pre)):
        if(y_Row_Pre[i] == 0 or y_Row_Pre[i] == 1): #Here we check if the participant used correct documentation, if he didn't write a 0 or 1 as a result to a puzzle we neglect the puzzle
            completedPuzzles.append(initialPuzzles[i])
            y_Row.append(y_Row_Pre[i])

#This extremeCoutner adds easy and hard tasks manually to our X-Matrix, so for extremeCounter=2 it will add 1 easy task for each theme and 1 hard task for each theme
extremeCounter = 2 #extremeCounter/2 easy and hard tasks (0 and 1)


X_Matrix = np.zeros(shape=(len(y_Row) + extremeCounter * len(themesWithoutExcludedThemes),(len(themesWithoutExcludedThemes) + 1))) #Our X-Matrix used for training

for i in range(len(completedPuzzles)):
    filtered_themes = [theme.strip(" '") for theme in completedPuzzles[i][10].strip("[]").split(',') if theme.strip(" '") in themesWithoutExcludedThemes] #Here we filter the themes that were in the puzzle. This filtering is weird due to the format change after opening a csv on Windows.
    filtered_theme_count = len(filtered_themes) #We need this to scale our skills, if 3 skills are needed we need to add 1/3 to our matrix
    X_Matrix[i][len(themesWithoutExcludedThemes)] = int(completedPuzzles[i][3]) #add the rating of puzzle i to the last column in row i of our X-Matrix
    for j in range(len(themesWithoutExcludedThemes) ):
        if(themesWithoutExcludedThemes[j] in completedPuzzles[i][10]): #if a theme appears in our puzzle, we change the entry in the X-Matrix from 0 to 1/len(filtered_theme_count)
            X_Matrix[i][j] = float(1) / filtered_theme_count

#The mean of the tasks completed by the participant, used for future adjustments
taskMean = np.sum([float(puzzle[3]) for puzzle in completedPuzzles]) / len(completedPuzzles)

#The previousPlayerRating takes into account if the participant did well on the inital puzzles. It adds 5 for every correct task and substracts 5 for every incorrect task.
previousPlayerRating = taskMean + np.sum([result * 10 - 5 for result in y_Row]) #weighting the taskDifficulty doesn't make a difference, such as (difficulty/mean)*3 + 2 for correct answers


for j in range(extremeCounter):
    for i in range(len(themesWithoutExcludedThemes)):
        if(j % 2 == 0):
            X_Matrix[len(completedPuzzles)+ j*(len(themesWithoutExcludedThemes)) + i][len(themesWithoutExcludedThemes)] = 0.5 * taskMean
            #Adding a correctly solved puzzle with only task i for every task with a rating of 0.5*task Mean.
            #Done with the assumption that a player always solves a task correctly if it is half his rating.
            #This prevents our MIRT Model to return weights of negative values.
            y_Row.append(1)
        else:
            X_Matrix[len(completedPuzzles)+ j*(len(themesWithoutExcludedThemes)) + i][len(themesWithoutExcludedThemes)] = 2 * taskMean
            #Adding a wrongly solved puzzle with only task i for every task with a rating of 2*task Mean.
            #Done with the assumption that a player never solves a task correctly if it is double his rating.
            #This prevents our MIRT Model to return weights of very high values (i.e. 30000).
            y_Row.append(0)
        X_Matrix[len(completedPuzzles)+ j*(len(themesWithoutExcludedThemes)) + i][i] = 1

#for i in range(len(X_Matrix)):
#    print(X_Matrix[i], "\t\t", y_Row[i])

'''
Here we start our training.
We use the SciKit Learn LogisticRegression model, add an L2 penalty, set the inverse regularization strength to 1000 and the fit intercept to false, and tell the model to stop after 100000 iterations if it has not converged until then.
'''

logReg = linear_model.LogisticRegression(penalty='l2',C=1e+3, fit_intercept=False, max_iter=100000)
logReg.fit(X_Matrix, y_Row)
weights = logReg.coef_[0] #Getting the trained weights

abilities = []
#a is negative and we say a > 0, so take absolute value, leaving us with the same results if we take the negative difficulty...
#So an ability of -1900 should be good enough to solve a puzzle with a rating of 1950
#This was done before the adjustment of the hard and easy tasks, so a is always positive in our case.
a = -(weights[len(themesWithoutExcludedThemes)])
for i in range(len(themesWithoutExcludedThemes)):
    abilities.append(weights[i]/a)
mean = np.mean(abilities)
std = np.std(abilities)

'''
Printing the intermediate results
'''

print("-"*25, " RESULTS FOR 1E+3 ", "-" * 25)
print("Mean of Abilities:", mean)
print("Std of Abilities:", std)
print("Slope Parameter:", a)
for i in range(len(abilities)):
    print("Theme: {:<25} ability: {:<20} timesOccured: {:<10} timesCorrect: {:<10}".format(themesWithoutExcludedThemes[i], round(abilities[i], 4), np.count_nonzero(X_Matrix[:,i]), np.sum(np.where(X_Matrix[:,i] != 0, y_Row, 0))))



-------------------------  RESULTS FOR 1E+3  -------------------------
Mean of Abilities: 1271.286655476975
Std of Abilities: 359.32513820094283
Slope Parameter: 0.010911800894899942
Theme: castling                  ability: 1071.3412            timesOccured: 6          timesCorrect: 2         
Theme: sacrifice                 ability: 1110.4177            timesOccured: 16         timesCorrect: 7         
Theme: mateIn3                   ability: 1185.0685            timesOccured: 9          timesCorrect: 4         
Theme: capturingDefender         ability: 816.6839             timesOccured: 6          timesCorrect: 2         
Theme: doubleCheck               ability: 1166.3872            timesOccured: 6          timesCorrect: 3         
Theme: Bishop                    ability: 1525.9309            timesOccured: 23         timesCorrect: 14        
Theme: mateIn4                   ability: 1610.7608            timesOccured: 6          timesCorrect: 3         
Theme: clearance          

In [2]:
'''
Here we sort the Skills by rating to get a better understanding of the skills.
'''

sortedIndices = np.argsort(abilities)

sortedAbilities = np.array(abilities)[sortedIndices]
sortedThemes = np.array(themesWithoutExcludedThemes)[sortedIndices]

#print("-"*20, " SORTED ABILITIES ", "-" * 20)

for i in range(len(abilities)):
    print("Theme: {:<25} Ability: {:<20} timesOccured: {:<10} timesCorrect: {:<10}".format(sortedThemes[i], round(sortedAbilities[i], 4), np.count_nonzero(X_Matrix[:,themesWithoutExcludedThemes.index(sortedThemes[i])]) - extremeCounter, np.sum(np.where(X_Matrix[:,themesWithoutExcludedThemes.index(sortedThemes[i])] != 0, y_Row, 0))-int(extremeCounter/2)))

Theme: queensideAttack           Ability: 783.867              timesOccured: 4          timesCorrect: 1         
Theme: Rook                      Ability: 792.7321             timesOccured: 41         timesCorrect: 20        
Theme: promotion                 Ability: 807.2117             timesOccured: 4          timesCorrect: 0         
Theme: capturingDefender         Ability: 816.6839             timesOccured: 4          timesCorrect: 1         
Theme: interference              Ability: 817.5935             timesOccured: 4          timesCorrect: 1         
Theme: advancedPawn              Ability: 839.2941             timesOccured: 6          timesCorrect: 1         
Theme: mateIn5                   Ability: 880.2348             timesOccured: 4          timesCorrect: 1         
Theme: King                      Ability: 943.2009             timesOccured: 17         timesCorrect: 9         
Theme: fork                      Ability: 1004.4516            timesOccured: 7          timesCor

In [3]:
'''
Now we want to get a more reasonable approach, as the mean rating predicted by our LogReg model is probably too high.
Because the player has a mean rating of about 2169 and I suspect the sub-skills to vary in a region of 25% of the mean, let's rescale the data appropriately.
'''

currentMin = sortedAbilities[0]
currentMax = sortedAbilities[-1]
currentRange = currentMax - currentMin

playerMean = 0

abilityRange = 0.25

if (a > 0):
    predictedMean = mean
    playerMean = (mean + previousPlayerRating) / 2
else:
    predictedMean = taskMean - mean
    playerMean = ((previousPlayerRating - mean) + previousPlayerRating)/2

predictedMin = (1-abilityRange) * playerMean
predictedMax = (1/(1-abilityRange)) * playerMean
predictedRange = predictedMax - predictedMin

'''
This scales the subskills of the players and checks in the end if the range is 0.5*Rating between the worst and the best subskill.
'''

for adjustmentFactor in range(10):
    normalizedAbilities = []
    for i in range(len(sortedAbilities)):
        #normalizedAbility = ((sortedAbilities[i] - currentMin)/currentRange) * predictedRange + predictedMin       !This is the first version, where it gets scaled to a range around the predicted Mean
        normalizedAbility = (sortedAbilities[i] + adjustmentFactor * playerMean)/(adjustmentFactor + 1)             #!This is the second version, where we move the abilities closer to the predicted Mean
                                                                                                                    #to get rid of extreme values again (x*playerMean/(x+1)) is the amount we choose
        normalizedAbilities.append(normalizedAbility)
    if((normalizedAbilities[-1] - normalizedAbilities[0]) < 2 * abilityRange * playerMean):                         #check if new adjusted Abilities are in a range of 2* abilityRange around the PlayerMean
        break

'''
Printing the normalized Abilities
'''

print("-"*30, " NORMALIZED ABILITIES ", "-" * 30)

print("Player Rating on Initial Puzzles - mean +/-5 for every right/wrong answer", previousPlayerRating)
print("Player Mean based on initial Puzzles and LogReg:", predictedMean)
print("Predicted PlayerRating (less extreme - using both predicted Mean and averagePuzzles):", playerMean)
print("Mean of Normalized Abilities:", np.mean(normalizedAbilities))
print("Std of Normalized Abilities:", np.std(normalizedAbilities))

for i in range(len(abilities)):
    print("Theme: {:<25} Ability: {:<20} timesOccured: {:<10} timesCorrect: {:<10}".format(sortedThemes[i], int(normalizedAbilities[i]), np.count_nonzero(X_Matrix[:,themesWithoutExcludedThemes.index(sortedThemes[i])])-extremeCounter, np.sum(np.where(X_Matrix[:,themesWithoutExcludedThemes.index(sortedThemes[i])] != 0, y_Row, 0))-int(extremeCounter/2)))

------------------------------  NORMALIZED ABILITIES  ------------------------------
Player Rating on Initial Puzzles - mean +/-5 for every right/wrong answer 1348.641304347826
Player Mean based on initial Puzzles and LogReg: 1271.286655476975
Predicted PlayerRating (less extreme - using both predicted Mean and averagePuzzles): 1309.9639799124006
Mean of Normalized Abilities: 1297.0715384339255
Std of Normalized Abilities: 119.77504606698093
Theme: queensideAttack           Ability: 1134                 timesOccured: 4          timesCorrect: 1         
Theme: Rook                      Ability: 1137                 timesOccured: 41         timesCorrect: 20        
Theme: promotion                 Ability: 1142                 timesOccured: 4          timesCorrect: 0         
Theme: capturingDefender         Ability: 1145                 timesOccured: 4          timesCorrect: 1         
Theme: interference              Ability: 1145                 timesOccured: 4          timesCorrect: 

In [4]:
'''








CONTINUE HERE
















GET PUZZLE AMOUNT AND STORE EVERYTHING IN A DICTIONARY
'''

theme_Ability_TaskAmount_Dict = {}

minAbility = min(normalizedAbilities)
maxAbility = max(normalizedAbilities)

for j in range(1, 150):
    multiplicationFactor = j
    minTaskAmount = 1

    exponent = 3

    totalRecommendedTasks = 0

    for i in range(len(normalizedAbilities)):
        recommendedPuzzleAmounts = round(((normalizedAbilities[i] - maxAbility) ** exponent) / ((minAbility - maxAbility) ** exponent) * multiplicationFactor + minTaskAmount)
        totalRecommendedTasks += recommendedPuzzleAmounts
        theme_Ability_TaskAmount_Dict[sortedThemes[i]] = [normalizedAbilities[i], recommendedPuzzleAmounts]
    if(totalRecommendedTasks > 150):
        break
    
print(theme_Ability_TaskAmount_Dict)
print(totalRecommendedTasks)

{'queensideAttack': [1134.5983090393465, 8], 'Rook': [1137.5533452518382, 8], 'promotion': [1142.3798807288154, 8], 'capturingDefender': [1145.5372816760112, 8], 'interference': [1145.840478283102, 8], 'advancedPawn': [1153.0740214126572, 7], 'mateIn5': [1166.7209063490789, 7], 'King': [1187.709617888992, 6], 'fork': [1208.1265104508686, 5], 'trappedPiece': [1222.7149490744034, 5], 'Queen': [1226.0301776155645, 5], 'castling': [1230.4230392619409, 5], 'opening': [1231.1793229401246, 5], 'deflection': [1235.2113011838317, 5], 'enPassant': [1235.8725795541948, 5], 'sacrifice': [1243.448544340419, 4], 'Pawn': [1244.25584489817, 4], 'exposedKing': [1244.6755953712156, 4], 'defensiveMove': [1257.41159614939, 4], 'doubleCheck': [1262.1050620049564, 4], 'mateIn3': [1268.3321504030757, 4], 'pin': [1277.3477153626816, 4], 'skewer': [1282.5560443870702, 4], 'attraction': [1339.2495360415735, 3], 'Knight': [1380.117328583384, 2], 'Bishop': [1381.9529446483605, 2], 'advantage': [1382.1689412077747

In [5]:
'''
LOAD IMPORTANT PUZZLES
'''

import csv
importantPuzzles = []
with open('ImportantPuzzles.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='"', escapechar='\\')
    for row in spamreader:
        themes = [theme.strip(" '") for theme in row[10].strip("[]").split(',') if theme.strip(" '") in themesWithoutExcludedThemes]
        row.append(themes)
        importantPuzzles.append(row)

#row[0] = PuzzleID
#row[1] = FEN
#row[2] = Moves
#row[3] = Rating
#row[4] = RatingDeviation
#row[5] = Popularity
#row[6] = NbPlayes
#row[7] = Themes
#row[8] = URL
#row[9] = OpeningTags
#row[10] = GoodRowThemes
#row[11] = CountGoodRowThemes
#row[12] = GoodRowThemes but Formatted into List
header = importantPuzzles[0]
header.append("GoodRowThemesFormatted")
importantPuzzles.remove(header)


In [6]:
'''
ITERATE AND FIND PUZZLES THAT COULD BE CHOSEN
'''

lenSortedThemes = len(sortedThemes)

allPossiblePuzzlesForAllSubskills = [[] for _ in range(2 * lenSortedThemes)] #Creates a list of lists where I will append possible Puzzles for each subskill

completedIDs = []
for i in range(len(completedPuzzles)):
    completedIDs.append(completedPuzzles[i][0])



for puzzle in importantPuzzles:
    if(puzzle[0] not in completedIDs):
        for i in range(lenSortedThemes):
            if(sortedThemes[i] in puzzle[12]):
                dictEntryForTheme = theme_Ability_TaskAmount_Dict.get(sortedThemes[i])
                #stillNeededAmount = dictEntryForTheme[1]
                neededRating = int(dictEntryForTheme[0])
                if((int(puzzle[3]) >= neededRating and int(puzzle[3]) <= neededRating + 100)):
                    allPossiblePuzzlesForAllSubskills[i].append(puzzle)
                allPossiblePuzzlesForAllSubskills[lenSortedThemes + i].append(puzzle)

In [7]:
'''
GENERATE NEEDED AMOUNT OF PUZZLES FOR EACH SUBSKILL
'''

testPuzzles = []
for i in range(len(sortedThemes)):
    dictEntryForTheme = theme_Ability_TaskAmount_Dict.get(sortedThemes[i])
    neededAmount = dictEntryForTheme[1]
    #randomPuzzles = np.random.choice(range(0, len(allPossiblePuzzlesForAllSubskills[i])), size=neededAmount, replace=False)        !This would give random Puzzles
    #for j in randomPuzzles:
    #    testPuzzle = allPossiblePuzzlesForAllSubskills[i][j]
    #    if(testPuzzle not in testPuzzles):
    #        testPuzzles.append(allPossiblePuzzlesForAllSubskills[i][j])
    sortedGoodPuzzles = sorted(allPossiblePuzzlesForAllSubskills[i], key=lambda puzzle: len(puzzle[12]))                               #!This sorts the puzzles based on least amount of skills needed
                                                                                                                                       # -> Should lead to "isolated" training in this specific skill
    h = 0 
    if(len(sortedGoodPuzzles) >= neededAmount):                                                                                                                          
        for j in range(neededAmount):
            added = False
            while(not added):
                testPuzzle = sortedGoodPuzzles[h]
                if (testPuzzle not in testPuzzles):
                    testPuzzles.append(testPuzzle)
                    h += 1
                    added = True
                else:
                    h += 1
    else:
        sortedAllPuzzles = sorted(allPossiblePuzzlesForAllSubskills[len(sortedThemes) + i], key=lambda puzzle: (int(puzzle[3]) + 50 * len(puzzle[12])))         #Puzzles with many other skills get sorted worse
        for j in range(len(sortedGoodPuzzles)):
            added = False
            while(not added):
                testPuzzle = sortedGoodPuzzles[h]
                if (testPuzzle not in testPuzzles):
                    testPuzzles.append(testPuzzle)
                    h += 1
                    added = True
                else:
                    h += 1
        for k in range(neededAmount - len(sortedGoodPuzzles)):
            added = False
            while(not added):
                testPuzzle = sortedAllPuzzles[h]
                if (testPuzzle not in testPuzzles):
                    testPuzzles.append(testPuzzle)
                    h += 1
                    added = True
                else:
                    h += 1
        
print(len(testPuzzles))

154


In [12]:
'''
SHUFFLE TEST PUZZLES AND WRITE 2 FILES
1 WITH IDs FOR TEST CANDIDATES (XLSX)
1 WITH THE WHOLE PUZZLE INFORMATION (CSV)
'''

import pandas as pd
import os

shuffledIDs = np.random.choice(range(0, len(testPuzzles)), size=len(testPuzzles), replace=False)        #!This gives randomIDs for shuffling
shuffledTestPuzzles = [testPuzzles[i] for i in shuffledIDs]

candidateName = filenames[0].split('/')[0]

if not os.path.exists(candidateName):
    os.makedirs(candidateName)

xlsxFilename = candidateName + '/' + candidateName + "_TestPuzzles.xlsx"
csvFilename = candidateName + '/' + candidateName + "_TestPuzzles.csv"

testIDs = []
for puzzle in shuffledTestPuzzles:
    testIDs.append (["=HYPERLINK(\"https://lichess.org/training/" + puzzle[0] + "\")", ""])

#Get New CSV File with all initial Puzzles
xlsxHeader = ["PuzzleLink","Richtig(1) oder Falsch(0)"]
testIDs.insert(0, xlsxHeader)

shuffledTestPuzzles.insert(0, header)

with open(csvFilename, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(shuffledTestPuzzles)

df = pd.DataFrame(testIDs[1:], columns=testIDs[0])
df.to_excel(xlsxFilename, index=False)

print(len(shuffledTestPuzzles))


155
