In [26]:
import dlib
import numpy as np
import ocr_preprocess as ocrp

In [27]:
class windowStructSVM:
    def __init__(self, samples, labels, window, num_classes, object_dimension, C = 1, ICM_iters = 3):
        self.samples = samples
        self.num_samples = len(samples)

        self.labels = labels
        self.window = window
        self.num_classes = num_classes
        self.object_dimension = object_dimension
        
        self.num_dimensions = num_classes*object_dimension*window + window - 1
        
        self.C = C
        self.ICM_iters = ICM_iters #hyperparameter for number of times to run ICM sampling
    
    #Make_psi performs neighbor checking
    def make_psi(self, x, y):
        psi = dlib.vector()
        psi.resize(self.num_dimensions)
        for i in range(self.window):
            idx_obj = i*(self.num_classes*self.object_dimension) + self.object_dimension*y[i]
            idx_obj = int(idx_obj)
            for j in range(self.object_dimension):
                psi[idx_obj+j] = x[i][j]
        for i in range(self.window - 1):
            idx_class = self.window*self.num_classes*self.object_dimension + i
            idx_class = int(idx_class)
            psi[idx_class] = 1 if y[i] != y[i+1] else 0
        return(psi)
    
    # Using Iterated Conditional Mode (ICM) for separation oracle
    def separation_oracle(self, idx, current_solution):
        samp = self.samples[idx]
        max_scoring_label = np.random.choice(self.num_classes, self.window) #random initialization for best label
        max_error = 0 #max value of target function
        for ICM_iter in range(self.ICM_iters):
            for entry in range(self.window):
                for proposed_label in range(self.num_classes):
                    tmp_label = np.copy(max_scoring_label)
                    tmp_label[entry] = proposed_label
                    psi_y = self.make_psi(samp, tmp_label)
                    score_y = dlib.dot(current_solution, psi_y)
                    label_dist = sum([1 if u != v else 0 for u, v in zip(self.labels[idx], tmp_label)]) #Hamming distance
                    if max_error < score_y + label_dist:
                        max_error = score_y + label_dist
                        max_scoring_label[entry] = proposed_label
        label_dist = sum([1 if u != v else 0 for u, v in zip(self.labels[idx], max_scoring_label)]) #Hamming distance
        psi = self.make_psi(samp, max_scoring_label)
        return(label_dist, psi)
    
    def get_truth_joint_feature_vector(self, idx):
        return self.make_psi(self.samples[idx], self.labels[idx])
    
    #Do ICM at test time because of complexity
    def predict_label(self, weights, sample, ICM_iters = 3):
        max_scoring_label = np.random.choice(self.num_classes, self.window) #random initialization for best label
        max_score = 0
        for ICM_iter in range(ICM_iters):
            for entry in range(self.window):
                for proposed_label in range(self.num_classes):
                    tmp_label = np.copy(max_scoring_label)
                    tmp_label[entry] = proposed_label
                    psi_y = self.make_psi(sample, tmp_label)
                    score_y = dlib.dot(weights, psi_y)
                    if max_score < score_y:
                        max_score = score_y
                        max_scoring_label[entry] = proposed_label
        return(max_scoring_label)
    
    def predict_labels(self, weights, samples, ICM_iters = 3):
        return(np.array([self.predict_label(weights, sample, ICM_iters = ICM_iters) for sample in samples]))

In [28]:
window = 2

train_features, train_labels = ocrp.loadWindows(0, 4000, window)
test_features, test_labels = ocrp.loadWindows(4000, 5000, window)

In [29]:
svm = windowStructSVM(train_features, train_labels, window = window, num_classes = 27, object_dimension = 128, ICM_iters = 3)
problem = svm
weights = dlib.solve_structural_svm_problem(problem)

In [30]:
train_predictions = svm.predict_labels(weights, train_features, ICM_iters=10)

In [31]:
test_predictions = svm.predict_labels(weights, test_features, ICM_iters=10)

In [32]:
train_acc = np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(train_labels, train_predictions)])
print("Window: %s" %(window))
print("Train acc: %f" %(train_acc))

Window: 2
Train acc: 0.667519


In [33]:
train_acc = np.mean([(t==p).all() for t,p in zip(train_labels, train_predictions)])
print("Window: %s" %(window))
print("Train acc: %f" %(train_acc))

Window: 2
Train acc: 0.032680


In [34]:
test_acc = np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(test_labels, test_predictions)])
print("Window: %s" %(window))
print("Test acc: %f" %(test_acc))

Window: 2
Test acc: 0.373388


In [35]:
test_acc = np.mean([(t==p).all() for t,p in zip(test_labels, test_predictions)])
print("Window: %s" %(window))
print("Test acc: %f" %(test_acc))

Window: 2
Test acc: 0.000000


In [36]:
window = 3

train_features, train_labels = ocrp.loadWindows(0, 4000, window)
test_features, test_labels = ocrp.loadWindows(4000, 5000, window)

In [37]:
svm = windowStructSVM(train_features, train_labels, window = window, num_classes = 27, object_dimension = 128, ICM_iters = 3)
problem = svm
weights = dlib.solve_structural_svm_problem(problem)

In [38]:
train_predictions = svm.predict_labels(weights, train_features, ICM_iters=10)

In [39]:
test_predictions = svm.predict_labels(weights, test_features, ICM_iters=10)

In [40]:
train_acc = np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(train_labels, train_predictions)])
print("Window: %s" %(window))
print("Train acc: %f" %(train_acc))

Window: 3
Train acc: 0.674567


In [41]:
train_acc = np.mean([(t==p).all() for t,p in zip(train_labels, train_predictions)])
print("Window: %s" %(window))
print("Train acc: %f" %(train_acc))

Window: 3
Train acc: 0.000000


In [42]:
test_acc = np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(test_labels, test_predictions)])
print("Window: %s" %(window))
print("Test acc: %f" %(test_acc))

Window: 3
Test acc: 0.368173


In [45]:
test_acc = np.mean([(t==p).all() for t,p in zip(test_labels, test_predictions)])
print("Window: %s" %(window))
print("Test acc: %f" %(test_acc))

Window: 3
Test acc: 0.000000


In [47]:
test_predictions[0]

array([ 2,  0, 21])

In [46]:
test_labels[0]

array([[ 11.],
       [ 24.],
       [ 26.]])