In [12]:
import dlib
import numpy as np
import ocr_preprocess as ocrp

In [13]:
class windowStructSVM:
    def __init__(self, samples, labels, window, num_classes, object_dimension, C = 1, ICM_iters = 3):
        self.samples = samples
        self.num_samples = len(samples)

        self.labels = labels
        self.window = window
        self.num_classes = num_classes
        self.object_dimension = object_dimension
        
        self.num_dimensions = num_classes*window + num_classes*object_dimension*window
        
        self.C = C
        self.ICM_iters = ICM_iters #hyperparameter for number of times to run ICM sampling
    
    #Make_psi performs one-hot encoding for the labels
    def make_psi(self, x, y):
        psi = dlib.vector()
        psi.resize(self.num_dimensions)
        for i in range(self.window):
            idx_obj = i*(self.window*self.object_dimension + self.num_classes) + y[i]*self.object_dimension
            idx_obj = int(idx_obj)
            idx_class = i*(self.window*self.object_dimension + self.num_classes) + self.window*self.object_dimension + y[i]
            idx_class = int(idx_class)
            for j in range(self.object_dimension):
                psi[idx_obj+j] = x[i][j]
            psi[idx_class] = 1
        return(psi)
    
#     #Make_psi performs neighbor checking
#     def make_psi(self, x, y):
#         psi = dlib.vector()
#         psi.resize(self.num_dimensions)
#         for i in range(self.window):
#             idx_obj = i*(self.window*self.object_dimension + self.num_classes) + y[i]*self.object_dimension
#             idx_obj = int(idx_obj)
#             idx_class = i*(self.window*self.object_dimension + self.num_classes) + self.window*self.object_dimension + y[i]
#             idx_class = int(idx_class)
#             for j in range(self.object_dimension):
#                 psi[idx_obj+j] = x[i][j]
#             psi[idx_class] = 1
#         return(psi)
    
    # Using Iterated Conditional Mode (ICM) for separation oracle
    def separation_oracle(self, idx, current_solution):
        samp = self.samples[idx]
        max_scoring_label = np.random.choice(self.num_classes, self.window) #random initialization for best label
        max_error = 0 #max value of target function
        for ICM_iter in range(self.ICM_iters):
            for entry in range(self.window):
                for proposed_label in range(self.num_classes):
                    tmp_label = np.copy(max_scoring_label)
                    tmp_label[entry] = proposed_label
                    psi_y = self.make_psi(samp, tmp_label)
                    score_y = dlib.dot(current_solution, psi_y)
                    label_dist = sum([1 if u != v else 0 for u, v in zip(self.labels[idx], tmp_label)]) #Hamming distance
                    if max_error < score_y + label_dist:
                        max_error = score_y + label_dist
                        max_scoring_label[entry] = proposed_label
        label_dist = sum([1 if u != v else 0 for u, v in zip(self.labels[idx], max_scoring_label)]) #Hamming distance
        psi = self.make_psi(samp, max_scoring_label)
        return(label_dist, psi)
    
    def get_truth_joint_feature_vector(self, idx):
        return self.make_psi(self.samples[idx], self.labels[idx])
    
    #Do ICM at test time because of complexity
    def predict_label(self, weights, sample, ICM_iters = 3):
        max_scoring_label = np.random.choice(self.num_classes, self.window) #random initialization for best label
        max_score = 0
        for ICM_iter in range(ICM_iters):
            for entry in range(self.window):
                for proposed_label in range(self.num_classes):
                    tmp_label = np.copy(max_scoring_label)
                    tmp_label[entry] = proposed_label
                    psi_y = self.make_psi(sample, tmp_label)
                    score_y = dlib.dot(weights, psi_y)
                    if max_score < score_y:
                        max_score = score_y
                        max_scoring_label[entry] = proposed_label
        return(max_scoring_label)
    
    def predict_labels(self, weights, samples, ICM_iters = 3):
        return(np.array([self.predict_label(weights, sample, ICM_iters = ICM_iters) for sample in samples]))

In [14]:
window = 3

ocr = ocrp.read_OCR('letter.data')
train_chops = ocrp.chop_idxs(ocr, start = 0, stop = 4000, window=window)
train_features = ocrp.chops_to_features(ocr, train_chops)
train_labels = ocrp.chops_to_labels(ocr, train_chops)

test_chops = ocrp.chop_idxs(ocr, start = 4000, stop = 5000, window=window)
test_features = ocrp.chops_to_features(ocr, test_chops)
test_labels = ocrp.chops_to_labels(ocr, test_chops)

In [15]:
train_features.shape

(2203, 2, 128)

In [16]:
test_features.shape

(500, 2, 128)

In [17]:
svm = windowStructSVM(train_features, train_labels, window = 2, num_classes = 27, object_dimension = 128, ICM_iters = 3)

In [18]:
problem = svm
weights = dlib.solve_structural_svm_problem(problem)

In [24]:
train_predictions = svm.predict_labels(weights, train_features, ICM_iters=10)

In [20]:
test_predictions = svm.predict_labels(weights, test_features, ICM_iters=3)

In [27]:
np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(train_labels, train_predictions)])

0.6629596005447117

In [29]:
np.mean([[(ti == pi) for ti,pi in zip(t,p)] for t,p in zip(test_labels, test_predictions)])

0.224

In [30]:
1/26

0.038461538461538464