In [1]:
import os
import torch
import copy
import tensorflow as tf
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
# from Downloads import LSTMSudokuClassifier as LSTMC
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from random import randint

In [2]:
class LSTMClassifier(nn.Module):

    def __init__(self, input_dim, n_features, hidden_dim, n_label, batch_size, n_layers, use_gpu):
        super(LSTMClassifier, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        self.n_features = n_features
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, bidirectional=True)
        self.hidden2label = nn.Sequential(torch.nn.Linear(2*hidden_dim, n_label),
                            torch.nn.Softmax(dim = -1))
        self.hidden = self.init_hidden()

    def init_hidden(self):
        if self.use_gpu:
            h0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim).cuda())
            c0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(n_layers, self.batch_size, self.hidden_dim))
            c0 = Variable(torch.zeros(n_layers, self.batch_size, self.hidden_dim))
        return (h0, c0)

    def forward(self, inputs):
        x = inputs.view(self.n_features, self.batch_size, self.input_dim) 
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        last_output = lstm_out[-1]
        print(last_output.shape)
        y  = self.hidden2label(lstm_out[-1])
        return y

In [4]:
class InputGrid():
    
    def __init__(self, Grid, batch_size):
        self.Grid = Grid
        self.gridLength = len(Grid[0])
        self.batchSize = batch_size
        
    def getlims(self, i):
        if 0 <= i <= 2:
            rowlims = [0,3]
        elif 3 <= i <= 5:
            rowlims = [3,6]
        elif 6 <= i <= 8:
                rowlims = [6,9]
        return rowlims

    def getSquare(self, squareRow, squareCol, k):
        box=[]
        for i in range(squareRow[0],squareRow[1]):
            for j in range(squareCol[0],squareCol[1]):
                box.append(self.Grid[k,i,j])
        return box
    
    # takes 1D returns 2D
    def one_hot(vec):
        for val in vec:
            
    
    def getInput(self):
        Rows = []
        Columns = []
        Squares = []
        for k in range(self.batchSize):
            rows = []
            column = []
            square = []
            for i in range(self.gridLength):
                for j in range(self.gridLength):
                    g_row = self.Grid[k,i,:]
                 
        return inputs, Indices, Update

In [5]:
use_plot = True
use_save = True
# if use_save:
#     import pickle
#     from datetime import datetime

DATA_DIR = 'data'
TRAIN_FILE = 'sudoku_dev.csv'
TEST_FILE = 'sudoku_test.txt'
TRAIN_LABEL = 'train_label.txt'
TEST_LABEL = 'test_label.txt'

In [6]:
class SudokuDataset(Dataset):
    def __init__(self, fpath):
        
        print(fpath)
        lines = open(fpath, 'r').read().splitlines()[1:]
        nsamples = len(lines)

        X = np.zeros((nsamples, 9*9), np.float32)  
        Y = np.zeros((nsamples, 9*9), np.int32) 

        for i, line in enumerate(lines):
            quiz, solution = line.split(",")
            for j, (q, s) in enumerate(zip(quiz, solution)):
                X[i, j], Y[i, j] = q, s

        X = np.reshape(X, (-1, 9, 9))
        Y = np.reshape(Y, (-1, 9, 9))
        self.X = X
        self.Y = Y

    def __getitem__(self, index):
        quiz = self.X[index]
        sol = self.Y[index]
        return quiz, sol
    
    def __len__(self):
        return len(self.X)

In [7]:
def countZeros(batch_grid):
    batch, row, col = batch_grid.shape
    num_zeros = 0
    
    for k in range(batch):
        for i in range(row):
            for j in range(col):
                if (batch_grid[k,i,j] == 0):
                    num_zeros += 1
                    
    return num_zeros

In [9]:
if __name__=='__main__':
    
    ## parameter setting
    epochs = 100
    batch_size = 64
    use_gpu = torch.cuda.is_available()
    learning_rate = 1e-3

    input_dim = 9
    hidden_dim = 50
    n_features = 3
    n_label = 10
    n_layers = 3
    
    train_path = os.path.join(DATA_DIR, TRAIN_FILE)
    test_path = os.path.join(DATA_DIR, TEST_FILE)


     ### ********************create model**************************
    model = LSTMClassifier(input_dim, n_features, hidden_dim, n_label, batch_size, n_layers, use_gpu)
    if use_gpu:
        model = model.cuda()
    
    training_set = SudokuDataset(train_path)
    train_loader = DataLoader(training_set,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0
                          )

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_function = nn.CrossEntropyLoss()
    train_loss_ = []
    test_loss_ = []
    train_acc_ = []
    test_acc_ = []
    
### training procedure
    for iter, traindata in enumerate(train_loader):
#         print("new data batch")
        train_inputs_raw, train_labels_raw = traindata

        train_data_numpy = Variable(train_inputs_raw).data.numpy()
        train_label_numpy = Variable(train_labels_raw).data.numpy()
#         print(train_data_numpy.shape, train_label_numpy.shape)
        num_zeros = countZeros(train_inputs_raw)
        print(num_zeros)
        cur_batch_size = train_data_numpy.shape[0]
    
        for cell in range(input_dim*input_dim): # max num zeros
#             print("working on cell:", cell)
            cur_batch_size = train_data_numpy.shape[0]

             = InputGrid(train_data_numpy, cur_batch_size)
            train_inputs, indices, updates = input_grid.getInput()

            train_labels = []
            for ind,index in enumerate(indices):
                k,i,j = index
                train_labels.append(int(train_label_numpy[k,i,j]))
            train_labels = Variable(torch.LongTensor(train_labels))
            
            total_acc = 0.0
            total_loss = 0.0
            total = 0.0

            for epoch in range(epochs):
#                 optimizer = adjust_learning_rate(optimizer, epoch)

#                 ## training epoch
#                 total_acc = 0.0
#                 total_loss = 0.0
#                 total = 0.0

                if use_gpu:
                    train_inputs, train_labels = train_inputs.cuda(), train_labels.cuda()
                else: train_inputs = train_inputs

                model.zero_grad()
                model.batch_size = len(train_labels)
#                 model.hidden = model.init_hidden()
                output = model(train_inputs)

                loss = loss_function(output, train_labels)
                loss.backward(retain_graph=True)
                optimizer.step()

                # calc training acc
                _, predicted = torch.max(output.data, 1)
#                 print('TrainPred =  ',predicted, 'While TrainLabel = ' ,train_labels.data)
                total_acc = (predicted == train_labels.data).sum()/len(train_labels)
                total += len(train_labels)
                total_loss += loss.data[0]
                
#                 print("loss:", loss.data[0], "acc", total_acc)
                
                
#                 train_loss_.append(total_loss / total)
#                 train_acc_.append(total_acc / total)

                print('[Epoch: %3d/%3d] Training Loss: %.3f, Training Acc: %.3f' 
                      % (epoch, epochs, loss.data[0], total_acc))
            
            # update with new prediction
            print("updating table...")
#             print("b", train_data_numpy)
            for iter,index in enumerate(indices):
                if (updates[iter]):
                    k,i,j = index
                    train_data_numpy[k,i,j] = train_labels[iter]
#             print("a", train_data_numpy)
        
        
        #****************************Will need to be outside training forloop************
        #First for loop over all Problems/Solutions here 
        
        #Insert first grid here - Our testloader will be [3*1*9 row,col,box inputs],corresponding solution entries
        #Consider switching to for n(0s), 
            #run zero selecting algorithm to extract the row column, box values
            #return index and use that to grab corresponding label
            #train inputs,labels = those values = testloader
            

#     param = {}
#     param['lr'] = learning_rate
#     param['batch_size'] = batch_size
#     param['input_dim'] = input_dim
#     param['hidden_dim'] = hidden_dim
#     param['n_features'] = n_features

#     result = {}
#     result['train_loss'] = train_loss_
# #     result['test_loss'] = test_loss_
#     result['train_acc'] = train_acc_
# #     result['test_acc'] = test_acc_
#     result['param'] = param

#     if use_plot:
#         import PlotFigure as PF
#         PF.PlotFigure(result, use_save)
#     if use_save:
#         filename = 'log/LSTM_classifier_' + datetime.now().strftime("%d-%h-%m-%s") + '.pkl'
#         result['filename'] = filename

#         fp = open(filename, 'wb')
#         pickle.dump(result, fp)
#         fp.close()
#         print('File %s is saved.' % filename)      
            

data\sudoku_dev.csv
3027
torch.Size([64, 100])
[Epoch:   0/100] Training Loss: 2.302, Training Acc: 0.094
torch.Size([64, 100])
[Epoch:   1/100] Training Loss: 2.302, Training Acc: 0.094
torch.Size([64, 100])
[Epoch:   2/100] Training Loss: 2.301, Training Acc: 0.188
torch.Size([64, 100])
[Epoch:   3/100] Training Loss: 2.299, Training Acc: 0.203
torch.Size([64, 100])
[Epoch:   4/100] Training Loss: 2.298, Training Acc: 0.172
torch.Size([64, 100])
[Epoch:   5/100] Training Loss: 2.296, Training Acc: 0.188
torch.Size([64, 100])
[Epoch:   6/100] Training Loss: 2.294, Training Acc: 0.156
torch.Size([64, 100])
[Epoch:   7/100] Training Loss: 2.292, Training Acc: 0.188
torch.Size([64, 100])
[Epoch:   8/100] Training Loss: 2.290, Training Acc: 0.203
torch.Size([64, 100])
[Epoch:   9/100] Training Loss: 2.287, Training Acc: 0.188
torch.Size([64, 100])
[Epoch:  10/100] Training Loss: 2.284, Training Acc: 0.188
torch.Size([64, 100])
[Epoch:  11/100] Training Loss: 2.280, Training Acc: 0.203
tor

[Epoch:   1/100] Training Loss: 2.317, Training Acc: 0.125
torch.Size([64, 100])
[Epoch:   2/100] Training Loss: 2.317, Training Acc: 0.109
torch.Size([64, 100])
[Epoch:   3/100] Training Loss: 2.316, Training Acc: 0.109
torch.Size([64, 100])
[Epoch:   4/100] Training Loss: 2.313, Training Acc: 0.109
torch.Size([64, 100])
[Epoch:   5/100] Training Loss: 2.303, Training Acc: 0.109
torch.Size([64, 100])
[Epoch:   6/100] Training Loss: 2.291, Training Acc: 0.125
torch.Size([64, 100])
[Epoch:   7/100] Training Loss: 2.283, Training Acc: 0.141
torch.Size([64, 100])
[Epoch:   8/100] Training Loss: 2.283, Training Acc: 0.141
torch.Size([64, 100])
[Epoch:   9/100] Training Loss: 2.287, Training Acc: 0.125
torch.Size([64, 100])
[Epoch:  10/100] Training Loss: 2.290, Training Acc: 0.172
torch.Size([64, 100])
[Epoch:  11/100] Training Loss: 2.296, Training Acc: 0.172
torch.Size([64, 100])
[Epoch:  12/100] Training Loss: 2.298, Training Acc: 0.172
torch.Size([64, 100])
[Epoch:  13/100] Training Lo

KeyboardInterrupt: 

In [None]:
## parameter setting
epochs = 50
batch_size = 1
use_gpu = torch.cuda.is_available()
learning_rate = 0.01

def adjust_learning_rate(optimizer, epoch):
    lr = learning_rate * (0.1 ** (epoch // 10))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer