In [1]:
#import numpy as np
import math
import os.path
import numpy as np
import pandas as pd
from io import StringIO

%pylab inline
pylab.rcParams['figure.figsize'] = (10, 8)

Populating the interactive namespace from numpy and matplotlib


In [3]:
all_sudokus = []
sudoku_size = 6 # 3 for 9x9 Sudoku, 6 for 36x36 Sudoku
# load file that contains sudokus as string of numbers
name = "datasets/minmax_rowcolumn_"+str(sudoku_size)+"by"+str(sudoku_size)+".txt"
with open(name) as f:
    for line in f:
        numbers = line.split("\n")[0]
        all_sudokus.append(numbers)

unique_sudokus = set(all_sudokus) # turn into set to remove duplicates
print("There were", len(all_sudokus), "sudokus.")
print("There are", len(unique_sudokus), "unique sudokus left.")

There were 1884 sudokus.
There are 1884 unique sudokus left.


In [4]:
# calculate the amount of givens of each number (variety),
# and the total amount of givens
# NOTE: only the amount is used for our research
def calc_amount_variety(sudoku, sudoku_size=3):
    diff_numbers = sudoku_size**2+1
    variety = np.zeros(diff_numbers) # array of amount of zeros, ones, twos, etc.
    #if sudoku_size > 3:
    #    sudoku.split(" ")[:-1]
    for number in sudoku[0]:
        print(number)
        number = int(number)
        if number < diff_numbers:
            variety[number] += 1
        else:
            print("NUMBER TOO HIGH")
    amount = sum(variety)-variety[0]
    return amount, variety 

# calculate the minimum and maximum variety (i.e. the minimum/maximum given numbers of all classes)
# NOTE: these minima and maxima are not used in our research
def calc_min_max_variety(variety, sudoku_size=3):
    min_var = int(math.pow(sudoku_size, 4))
    max_var = 0
    for i in range(1, len(variety)):
        if variety[i] > max_var:
            max_var = variety[i]    
        if variety[i] < min_var:
            min_var = variety[i]    
    return min_var, max_var

In [5]:
# transform a sudoku as string into a 9x9 array of ints
def transform3by3(sudoko_str):
    numbers = []
    for num in sudoko_str:
        numbers.append(int(num))
    return np.array(numbers).reshape(9,9)

# transform a sudoku as string into a 36x36 array of ints
def transform6by6(sudoku_str):
    sudoku = sudoku_str.split(" ")
    sudoku = sudoku[:-1]
    sudoku = list(map(int, sudoku))
    return np.array(sudoku).reshape(36,36)

# calculate the total minimum and maximum number of givens for all rows/columns
def getMinAndMaxPerRowAndColumn(sudoku):
    min_row_column = 99999
    max_row_column = 0
    
    for row in sudoku:
        givens = np.count_nonzero(row)
        if givens < min_row_column:
            min_row_column = givens
        if givens > max_row_column:
            max_row_column = givens

    for row in sudoku.T:
        givens = np.count_nonzero(row)
        if givens < min_row_column:
            min_row_column = givens
        if givens > max_row_column:
            max_row_column = givens
            
    return min_row_column, max_row_column

In [8]:
# READ        
import pandas as pd
r_name = "results/sudokus_"+str(sudoku_size)+"x"+str(sudoku_size)+"_stats.csv"
sudoku_data = pd.read_csv(r_name, sep =",")
sudoku_data = sudoku_data.values

# WRITE
outdir = ''
w_name = "TEST_minmax_rowcolumn_"+str(sudoku_size)+"by"+str(sudoku_size)+".txt"
with open(os.path.join(outdir, w_name), "w") as myfile:  
    for data in sudoku_data:
        if sudoku_size == 3:
            min_row_col, max_row_col = getMinAndMaxPerRowAndColumn(transform3by3(data[0]))
        elif sudoku_size == 6:
            min_row_col, max_row_col = getMinAndMaxPerRowAndColumn(transform6by6(data[0]))
        new_str = (str(data[0])+", "+str(data[1])+", "+str(data[2])+", "+str(data[3])+", "+str(data[4])+", "+
                   str(data[5])+", "+str(data[6])+", "+str(data[7])+", "+str(data[8])+", "+str(data[9])+", "+
                   str(data[10])+", "+str(min_row_col)+", "+str(max_row_col)+"\n")
        myfile.write(new_str)
    myfile.close()
    print('Finished!')

Finished!


In [6]:
# write the sudoku's and their characteristics to a file
# the desired format is:
# numbers_of_sudoku, amount_of_givens, min_givens_of_number, max_givens_of_number

outdir = ''
name = "TEST_sudokus_with_givens_"+str(sudoku_size)+"by"+str(sudoku_size)+".txt"
with open(os.path.join(outdir, name), "w") as myfile:
    for sudoku in unique_sudokus:
        sudoku_list = []
        if sudoku_size > 3:
            sudoku_list.append(sudoku.split(" ")[:-1])
        else:
            for num in sudoku:
                sudoku_list.append(num)
        amount, variety = calc_amount_variety(sudoku_list, sudoku_size)
        #min_var, max_var = calc_min_max_variety(variety, sudoku_size)
        myfile.write(sudoku)
        info = ", "+str(int(amount))+"\n"  #", "+str(int(min_var))+", "+str(int(max_var))+"\n"
        myfile.write(info)
    myfile.close()
    print('Finished!')

27
0
33
0
32
0
1
0
9
30
18
0
0
0
0
0
0
20
17
4
0
0
0
16
28
2
0
3
8
29
0
5
26
0
35
0
0
0
21
0
18
19
0
32
0
0
20
31
0
0
0
26
3
0
7
24
0
36
6
22
0
0
0
33
0
0
25
0
28
15
8
4
0
0
0
0
0
0
0
11
29
24
0
36
0
31
0
0
2
17
8
1
32
0
20
34
27
26
15
0
13
0
0
23
14
0
0
18
14
0
5
0
0
0
7
22
0
4
21
6
0
24
0
0
23
27
0
12
0
13
33
30
10
0
0
0
0
0
0
2
31
0
3
0
0
1
0
0
24
0
5
0
0
34
14
8
0
0
0
15
0
33
0
0
0
0
0
2
4
36
9
0
12
19
16
0
27
0
30
0
0
2
23
0
16
25
0
19
0
12
0
26
0
0
0
0
34
0
10
27
3
0
15
0
0
0
5
0
0
24
29
22
20
13
0
17
9
3
31
10
0
0
0
0
12
16
34
13
11
4
1
30
0
0
18
35
0
25
26
0
0
19
0
14
0
0
23
28
8
27
20
0
0
23
17
21
0
0
19
7
20
0
0
0
0
14
0
0
24
5
0
0
0
9
13
10
12
0
18
4
0
0
0
32
0
35
0
11
12
33
24
0
19
14
21
0
18
0
0
0
23
25
13
16
0
0
0
22
0
0
8
4
35
20
0
26
0
0
17
7
0
0
0
15
11
0
0
0
0
0
17
10
8
0
0
30
32
0
3
0
0
0
19
0
0
34
28
33
0
0
16
9
24
0
0
0
13
0
0
5
0
18
13
0
25
5
31
35
0
28
22
4
0
8
20
0
7
0
6
0
16
24
17
11
29
0
30
0
34
0
0
26
0
0
0
0
0
0
26
0
0
0
24
25
0
6
5
0
22
2
15
28
29
35
0
23
0

ValueError: invalid literal for int() with base 10: ','

'\n# set order from least amount of givens to most amount of givens\nmin_amount = 999\nmax_amount = 0\nfor unique_sud in unique_sudokus:\n    amount, variety = calc_amount_variety(unique_sud)\n    if amount < min_amount:\n        min_amount = amount\n    if amount > max_amount:\n        max_amount = amount\n\nsorted_list_of_unique_sudokus = []\nfor i_amount in range(min_amount, max_amount):\n    for unique_sud in unique_sudokus:\n        amount, variety = calc_amount_variety(unique_sud)\n        if amount == i_amount:\n            sorted_list_of_unique_sudokus.append()\n'