In [None]:
import os
import torch
import copy
import numpy as np
from torch.utils.data import DataLoader
# from Downloads import LSTMSudokuClassifier as LSTMC
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

In [None]:
class LSTMClassifier(nn.Module):

    def __init__(self, input_dim, n_timeSteps, hidden_dim, label_size, batch_size,nLayers, use_gpu):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        self.n_timeSteps = n_timeSteps
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, nLayers)
        self.hidden2label = nn.Sequential(torch.nn.Linear(hidden_dim,label_size),
                            torch.nn.Softmax(dim = -1))
        self.hidden = self.init_hidden()

    def init_hidden(self):
        if self.use_gpu:
            h0 = Variable(torch.zeros(nLayers, self.batch_size, self.hidden_dim).cuda())
            c0 = Variable(torch.zeros(nLayers, self.batch_size, self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(nLayers, self.batch_size, self.hidden_dim))
            c0 = Variable(torch.zeros(nLayers, self.batch_size, self.hidden_dim))
        return (h0, c0)

    def forward(self, inputs):
        x = inputs.view(self.n_timeSteps, batch_size, input_dim) 
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        last_output = lstm_out[-1]
#         print(last_output)
        y  = self.hidden2label(lstm_out[-1])
        return y

class InputGrid():
    
    def __init__(self, Grid, gridLength, batch_size):
        self.Grid =  Grid
        self.gridLength = gridLength      
        self.batchSize = batch_size
        
    def getlims(self, i):
        if 0 <= i <= 2:
            rowlims = [0,3]
        elif 3 <= i <= 5:
            rowlims = [3,6]
        elif 6 <= i <= 8:
                rowlims = [6,9]
        return rowlims

    def getSquare(squareRow,squareCol,k):
        box=[]
        for i in range(squareRow[0],squareRow[1]):
            for j in range(squareCol[0],squareCol[1]):
                box.append(Grid[k,i,j])
        return box

    def getBestInput(row,col,square):
        nZeros = np.sum([(row == 0).sum(),(col == 0).sum(),(square ==0).sum()])-3
        return nZeros 
    
    def getInput(self):
        Batch = []
        Indices = []
        Update = []
        for k in range(batchSize):
            Final = []
            Index = []
            update = 0
            minZeros = gridLength
            for i in range(gridLength):
                for j in range(gridLength):
                    if Grid[k,i,j] == 0:
                        row = Grid[k,i,:]
                        col = Grid[k,:,j]
                        squareRow = getlims(i)
                        squareCol = getlims(j)
                        square = np.array(getSquare(squareRow,squareCol,k))
                        nZeros = getBestInput(row,col,square)
                        if nZeros < minZeros:
                            Final = [row,col,square]
                            Index = [k,i,j]
                            update = 1
                            minZeros = nZeros    
        #                 print(Final,Index,update)
        #                 print('*****************')


            if len(Final) == 0:
                i = randint(0,8)
                j = randint(0,8)
                row = Grid[k,i,:]
                col = Grid[k,:,j]
                squareRow = getlims(i)
                squareCol = getlims(j)
                square = np.array(getSquare(squareRow,squareCol,k))
                Final = [row,col,square]
                Index = [k,i,j]
                update = 0
                
#             print(Final,Index,update)
#             print('*****************')
            Batch.append(Final)
            Indices.append(Index)
            Update.append(update)
            
#         print(Batch)
#         print('*****************')
#         print(Indices)
#         print('*****************')
#         print(Update)
#         print('*****************')

        inputs = [Variable(torch.FloatTensor(line)) for final in Batch for line in final]
        inputs = torch.cat(inputs).view(3, batchSize, -1)
        print(inputs)
        print(inputs.shape)
        
        return inputs, Indices, Update
    

    

In [None]:
use_plot = True
use_save = True
if use_save:
    import pickle
    from datetime import datetime

DATA_DIR = 'data'
TRAIN_DIR = 'train_txt'
TEST_DIR = 'test_txt'
TRAIN_FILE = 'train_txt.txt'
TEST_FILE = 'test_txt.txt'
TRAIN_LABEL = 'train_label.txt'
TEST_LABEL = 'test_label.txt'

In [None]:
## parameter setting
epochs = 50
batch_size = 1
use_gpu = torch.cuda.is_available()
learning_rate = 0.01

def adjust_learning_rate(optimizer, epoch):
    lr = learning_rate * (0.1 ** (epoch // 10))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer

In [None]:
if __name__=='__main__':
    ### parameter setting
    input_dim = 9
    hidden_dim = 50
    sentence_len = 3
    nlabel = 9
    nLayers = 3
    
#     train_file = os.path.join(DATA_DIR, TRAIN_FILE)
#     test_file = os.path.join(DATA_DIR, TEST_FILE)
#     fp_train = open(train_file, 'r')
#     train_filenames = [os.path.join(TRAIN_DIR, line.strip()) for line in fp_train]
#     filenames = copy.deepcopy(train_filenames)
#     fp_train.close()
#     fp_test = open(test_file, 'r')
#     test_filenames = [os.path.join(TEST_DIR, line.strip()) for line in fp_test]
#     fp_test.close()
#     filenames.extend(test_filenames)

#     corpus = DP.Corpus(DATA_DIR, filenames)
#***************************PlaceholdingBreak***************************
     ### ********************create model**************************
    model = LSTMClassifier(input_dim, sentence_len, hidden_dim,
                           nlabel, batch_size, nLayers, use_gpu)
    if use_gpu:
        model = model.cuda()
        
    ### data processing
#     dtrain_set = DP.TxtDatasetProcessing(DATA_DIR, TRAIN_DIR, TRAIN_FILE, TRAIN_LABEL, sentence_len, corpus)

#     train_loader = DataLoader(dtrain_set,
#                           batch_size=batch_size,
#                           shuffle=True,
#                           num_workers=4
#                          )
#     dtest_set = DP.TxtDatasetProcessing(DATA_DIR, TEST_DIR, TEST_FILE, TEST_LABEL, sentence_len, corpus)

#     test_loader = DataLoader(dtest_set,
#                           batch_size=batch_size,
#                           shuffle=False,
#                           num_workers=4
#                          )

#***************************PlaceholdingBreak***************************
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_function = nn.CrossEntropyLoss()
    train_loss_ = []
    test_loss_ = []
    train_acc_ = []
    test_acc_ = []
#***************************PlaceholdingBreak***************************
### training procedure

#********************Fake Train and Test loader*************************************
    n_boxes = 2
    inputs = [Variable(torch.FloatTensor(np.random.permutation(range(1,10))))\
      for _ in range(sentence_len)]

    test = [Variable(torch.FloatTensor(np.random.permutation(range(1,10))))\
      for _ in range(sentence_len)]

    inputs = torch.cat(inputs).view(sentence_len, 1, -1)
    test = torch.cat(test).view(sentence_len, 1, -1)
    target = Variable(torch.LongTensor(n_boxes).random_(1, 9))
    train_loader = target[0],inputs #real soln value, row,col,box at that index
    test_loader = target[1],test
    
    inputGrid = InputGrid(Grid,gridLength)
    
#         print(train_loader)

#*************************************************************************


    for epoch in range(epochs):
        optimizer = adjust_learning_rate(optimizer, epoch)

        ## training epoch
        total_acc = 0.0
        total_loss = 0.0
        total = 0.0
        
        for puzzle,
        #First for loop over all Problems/Solutions here 
        
        #Insert first grid here - Our trainloader will be [3*1*9 row,col,box inputs],corresponding solution entries
        #Consider switching to for n(0s), 
            #run zero selecting algorithm to extract the row column, box values
            #return index and use that to grab corresponding label
            #train inputs,labels = those values
            
            
#         for iter, traindata in enumerate(train_loader): 
        train_inputs, train_labels = inputs,target[0]
        #train_labels = torch.squeeze(train_labels)
#         print(train_inputs)
            
#### For real Data, need to wrap as 
            #if use_gpu:
#                 train_inputs, train_labels = Variable(train_inputs.cuda()), train_labels.cuda()
#             else: train_inputs = Variable(train_inputs)


        if use_gpu:
            train_inputs, train_labels = train_inputs.cuda(), train_labels.cuda()
        else: train_inputs = train_inputs

        model.zero_grad()
        model.batch_size = len(train_labels) #should be one
        model.hidden = model.init_hidden()
        output = model(train_inputs)

        #could consider adding up the losses and doing backprop after each puzzle
        loss = loss_function(output, train_labels)

        #Could consider pushing both of these outside the loop and running after each puzzle
        loss.backward()
        optimizer.step()

        #****************************Break***********************
        #Update the grid here
        #***************************Break*************************

        # calc training acc
        _, predicted = torch.max(output.data, 1)
        print('TrainPred =  ',predicted, 'While TrainLabel = ' ,train_labels.data)
        total_acc += (predicted == train_labels.data).sum()
        total += len(train_labels)
        total_loss += loss.data[0]
            
      
        train_loss_.append(total_loss / total)
        train_acc_.append(total_acc / total)
        
        
        #****************************Will need to be outside training forloop************
        #First for loop over all Problems/Solutions here 
        
        #Insert first grid here - Our testloader will be [3*1*9 row,col,box inputs],corresponding solution entries
        #Consider switching to for n(0s), 
            #run zero selecting algorithm to extract the row column, box values
            #return index and use that to grab corresponding label
            #train inputs,labels = those values = testloader
            
        ## testing epoch
        total_acc = 0.0
        total_loss = 0.0
        total = 0.0
        
        
        
#         for iter, testdata in enumerate(test_loader):
        test_inputs, test_labels = test,target[1],
#             test_labels = torch.squeeze(test_labels)


        #### For real Data, need to wrap as 
        #if use_gpu:
#                 train_inputs, train_labels = Variable(train_inputs.cuda()), train_labels.cuda()
#             else: train_inputs = Variable(train_inputs)

        if use_gpu:
            test_inputs, test_labels = test_inputs.cuda(), test_labels.cuda()
        else: test_inputs = test_inputs

        model.batch_size = len(test_labels)
        model.hidden = model.init_hidden()
        output = model(test_inputs.t())

        loss = loss_function(output, test_labels)

        # calc testing acc
        _, predicted = torch.max(output.data, 1)
        print('TestPred = ',predicted, 'While TestLabel = ' ,test_labels.data)
        total_acc += (predicted == test_labels.data).sum()
        total += len(test_labels)
        total_loss += loss.data[0]
        
        
        
        test_loss_.append(total_loss / total)
        test_acc_.append(total_acc / total)

        print('[Epoch: %3d/%3d] Training Loss: %.3f, Testing Loss: %.3f, Training Acc: %.3f, Testing Acc: %.3f'
              % (epoch, epochs, train_loss_[epoch], test_loss_[epoch], train_acc_[epoch], test_acc_[epoch]))

    param = {}
    param['lr'] = learning_rate
    param['batch size'] = batch_size
    param['input dim'] = input_dim
    param['hidden dim'] = hidden_dim
    param['nTimeSteps'] = sentence_len

    result = {}
    result['train loss'] = train_loss_
    result['test loss'] = test_loss_
    result['train acc'] = train_acc_
    result['test acc'] = test_acc_
    result['param'] = param

    if use_plot:
        import PlotFigure as PF
        PF.PlotFigure(result, use_save)
    if use_save:
        filename = 'log/LSTM_classifier_' + datetime.now().strftime("%d-%h-%m-%s") + '.pkl'
        result['filename'] = filename

        fp = open(filename, 'wb')
        pickle.dump(result, fp)
        fp.close()
        print('File %s is saved.' % filename)      
            