In [67]:
import os
import torch
import copy
import tensorflow as tf
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
# from Downloads import LSTMSudokuClassifier as LSTMC
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from random import randint

In [68]:
class LSTMClassifier(nn.Module):

    def __init__(self, input_dim, n_features, hidden_dim, n_label, batch_size, n_layers, use_gpu):
        super(LSTMClassifier, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        self.n_features = n_features
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, bidirectional=True)
        self.hidden2label = nn.Sequential(torch.nn.Linear(2*hidden_dim, n_label),
                            torch.nn.Softmax(dim = -1))
        self.hidden = self.init_hidden()

    def init_hidden(self):
        if self.use_gpu:
            h0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim).cuda())
            c0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim))
            c0 = Variable(torch.zeros(2*n_layers, self.batch_size, self.hidden_dim))
        return (h0, c0)

    def forward(self, inputs):
        x = inputs.view(self.n_features, self.batch_size, self.input_dim) 
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        last_output = lstm_out[-1]
        print(last_output.shape)
        y  = self.hidden2label(lstm_out[-1])
        return y

In [69]:
class InputGrid():
    
    def __init__(self, Grid):
        self.Grid = Grid
        self.gridLength = len(Grid[0])
        
    def getlims(self, i):
        if 0 <= i <= 2:
            rowlims = [0,3]
        elif 3 <= i <= 5:
            rowlims = [3,6]
        elif 6 <= i <= 8:
                rowlims = [6,9]
        return rowlims

    def get_square(self, i):
        grid_square = None
        if i < 3:
            if i%3 == 0:
                grid_square = self.Grid[0:3,0:3]
            elif i%3 == 1:
                grid_square = self.Grid[0:3,3:6]
            elif i%3 == 2:
                grid_square = self.Grid[0:3,6:9]
        elif i < 6:
            if i%3 == 0:
                grid_square = self.Grid[3:6,0:3]
            elif i%3 == 1:
                grid_square = self.Grid[3:6,3:6]
            elif i%3 == 2:
                grid_square = self.Grid[3:6,6:9]
        elif i < 9:
            if i%3 == 0:
                grid_square = self.Grid[6:9,0:3]
            elif i%3 == 1:
                grid_square = self.Grid[6:9,3:6]
            elif i%3 == 2:
                grid_square = self.Grid[6:9,6:9]
        return grid_square.flatten()
                
    
    # takes 1D returns 2D
    def one_hot(self, vec):
        one_hot_matrix = []
        for val in vec:
            hot_vec = [0 for _ in range(9)]
            if val > 0:
                hot_vec[int(val)-1] = 1
            else:
                hot_vec = [1.0/9 for _ in range(9)]
            one_hot_matrix.append(hot_vec)
        real_one_hot = np.transpose(np.array(one_hot_matrix))
        return real_one_hot
    
    #takes 2D returns 3D
    def getInput(self):
        print(self.Grid)
        Rows = np.zeros((9,9,9))
        Columns = np.zeros((9,9,9))
        Squares = np.zeros((9,9,9))
        for i in range(self.gridLength):
            hot_row = self.one_hot(self.Grid[i,:])
            hot_column = self.one_hot(self.Grid[:,i])
            hot_square = self.one_hot(self.get_square(i))
            
            Rows[i] = hot_row
            Columns[i] = hot_column
            Squares[i] = hot_square
        
        row_tensor = Variable(torch.FloatTensor(Rows))
        col_tensor = Variable(torch.FloatTensor(Columns))
        square_tensor = Variable(torch.FloatTensor(Squares))
        print(row_tensor)
        print(col_tensor)
        print(square_tensor)
                 
        return row_tensor, col_tensor, square_tensor

In [70]:
# use_plot = True
# use_save = True
# if use_save:
#     import pickle
#     from datetime import datetime

DATA_DIR = 'data'
TRAIN_FILE = 'sudoku_dev.csv'
TEST_FILE = 'sudoku_test.txt'
TRAIN_LABEL = 'train_label.txt'
TEST_LABEL = 'test_label.txt'

In [71]:
class SudokuDataset(Dataset):
    def __init__(self, fpath):
        
        print(fpath)
        lines = open(fpath, 'r').read().splitlines()[1:]
        nsamples = len(lines)

        X = np.zeros((nsamples, 9*9), np.float32)  
        Y = np.zeros((nsamples, 9*9), np.int32) 

        for i, line in enumerate(lines):
            quiz, solution = line.split(",")
            for j, (q, s) in enumerate(zip(quiz, solution)):
                X[i, j], Y[i, j] = q, s

        X = np.reshape(X, (-1, 9, 9))
        Y = np.reshape(Y, (-1, 9, 9))
        self.X = X
        self.Y = Y

    def __getitem__(self, index):
        quiz = self.X[index]
        sol = self.Y[index]
        return quiz, sol
    
    def __len__(self):
        return len(self.X)

In [72]:
if __name__=='__main__':
    
    ## parameter setting
    epochs = 100
    batch_size = 9
    use_gpu = torch.cuda.is_available()
    learning_rate = 1e-3

    input_dim = 9
    hidden_dim = 50
    n_features = 3
    n_label = 10
    n_layers = 3
    
    train_path = os.path.join(DATA_DIR, TRAIN_FILE)
    test_path = os.path.join(DATA_DIR, TEST_FILE)


     ### ********************create model**************************
    model = LSTMClassifier(input_dim, n_features, hidden_dim, n_label, batch_size, n_layers, use_gpu)
    if use_gpu:
        model = model.cuda()
    
    training_set = SudokuDataset(train_path)
    train_loader = DataLoader(training_set,
                          batch_size=1,
                          shuffle=True,
                          num_workers=8
                          )

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_function = nn.CrossEntropyLoss()
    train_loss_ = []
    test_loss_ = []
    train_acc_ = []
    test_acc_ = []
    
### training procedure
    for epoch in range(epochs):
        # optimizer = adjust_learning_rate(optimizer, epoch)

        for iter, traindata in enumerate(train_loader):
            train_inputs_raw, train_labels_raw = traindata

            train_data_numpy = Variable(train_inputs_raw).data.numpy()
            train_label_numpy = Variable(train_labels_raw).data.numpy()
#             print(train_data_numpy[0].shape, train_data_numpy.shape)
    
            input_grid = InputGrid(train_data_numpy[0])
            row_tensor, col_tensor, sqaure_tensor = input_grid.getInput()

            train_labels = train_label_numpy
            
#             for ind,index in enumerate(indices):
#                 k,i,j = index
#                 train_labels.append(int(train_label_numpy[k,i,j]))
#             train_labels = Variable(torch.LongTensor(train_labels))
            
            total_acc = 0.0
            total_loss = 0.0
            total = 0.0

            if use_gpu:
                train_inputs, train_labels = train_inputs.cuda(), train_labels.cuda()
            else: train_inputs = train_inputs

            model.zero_grad()
            model.batch_size = len(train_labels)
#                 model.hidden = model.init_hidden()
            output = model([row_tensor, col_tensor, sqaure_tensor])

            loss = loss_function(output, train_labels)
            loss.backward(retain_graph=True)
            optimizer.step()

            # calc training acc
            _, predicted = torch.max(output.data, 1)
#                 print('TrainPred =  ',predicted, 'While TrainLabel = ' ,train_labels.data)
            total_acc = (predicted == train_labels.data).sum()/len(train_labels)
            total += len(train_labels)
            total_loss += loss.data[0]

#                 print("loss:", loss.data[0], "acc", total_acc)


#                 train_loss_.append(total_loss / total)
#                 train_acc_.append(total_acc / total)

            print('[Epoch: %3d/%3d] Training Loss: %.3f, Training Acc: %.3f' 
                  % (epoch, epochs, loss.data[0], total_acc))
        

data/sudoku_dev.csv
[[0. 7. 3. 0. 5. 6. 9. 4. 0.]
 [9. 0. 0. 4. 3. 0. 0. 0. 0.]
 [0. 6. 0. 1. 0. 7. 0. 2. 0.]
 [0. 0. 5. 9. 8. 0. 0. 1. 0.]
 [0. 0. 8. 0. 4. 0. 6. 5. 0.]
 [0. 9. 0. 2. 0. 0. 0. 0. 0.]
 [0. 1. 6. 0. 0. 0. 4. 0. 3.]
 [0. 3. 2. 0. 0. 8. 0. 9. 5.]
 [0. 5. 0. 0. 0. 0. 7. 0. 0.]]
Variable containing:
(0 ,.,.) = 
  0.1111  0.0000  0.0000  0.1111  0.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  0.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  1.0000  0.1111  0.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  0.0000  0.0000  0.0000  1.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  1.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  0.0000  1.0000  0.0000  0.0000  0.1111
  0.1111  1.0000  0.0000  0.1111  0.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  0.0000  0.0000  0.0000  0.0000  0.1111
  0.1111  0.0000  0.0000  0.1111  0.0000  0.0000  1.0000  0.0000  0.1111

(1 ,.,.) = 
  0.00

NameError: name 'train_inputs' is not defined

In [None]:
## parameter setting
epochs = 50
batch_size = 1
use_gpu = torch.cuda.is_available()
learning_rate = 0.01

def adjust_learning_rate(optimizer, epoch):
    lr = learning_rate * (0.1 ** (epoch // 10))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer