In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
from mnist import MNIST
from sklearn.linear_model import LogisticRegression

# Load MNIST
mnist_loader = MNIST("/content/drive/MyDrive/MNIST")
mnist_loader.gz = True
X_train, y_train = mnist_loader.load_training()
X_test, y_test = mnist_loader.load_testing()
X_train = np.array(X_train, dtype='float32') / 255
y_train = np.array(y_train, dtype='int16')
X_test = np.array(X_test, dtype='float32') / 255
y_test = np.array(y_test, dtype='int16')

In [3]:
def compute_black(X, print_iterval):
    
    # init
    black = []
    
    for idx in range(X.shape[0]):
        
        # init
        b = np.empty((28,28))
            
        # Reshape image
        x = X[idx].reshape((28,28))
        
        b[0, 0] = 1 if x[0, 0] == 0 else 0
        
        # Base case
        for i in range(1, 28):
            t = b[0, i - 1]
            b[0, i] = t + 1 if x[0, i] == 0 else t
            
        # Recursive
        for i in range(1, 28):
            for j in range(28):
                if j == 0:
                    t = b[i - 1, j]
                    b[i, j] = t + 1 if x[i, j] == 0 else t
                else:
                    t = b[i - 1, j] + b[i, j - 1] - b[i - 1, j - 1]
                    b[i, j] = t + 1 if x[i, j] == 0 else t
                    
        black.append(b)
        
    return np.array(black, dtype = 'int')

In [4]:
k = 100
np.random.seed(42)
rect = []

while len(rect) != k:
    
    # Randomly pick top left point, length, and breadth
    tl = np.random.randint(low = 5, high = 23, size = 2)
    b = np.random.randint(low = 5, high = 28 - tl[1])
    l = np.random.randint(low = 5, high = 28 - tl[0])
    if b * l < 130 or b * l > 170:
        continue
    
    # Other corners
    tr = np.array([tl[0] + l, tl[1]])
    bl = np.array([tl[0], tl[1] + b])
    br = np.array([tl[0] + l, tl[1] + b])
    
    # Mid points
    vt = np.array([tl[0] + l//2, tl[1]])
    vb = np.array([tl[0] + l//2, bl[1]])
    hl = np.array([tl[0], tl[1] + b//2])
    hr = np.array([tr[0], tr[1] + b//2])

    rect.append([tl, tr, bl, br, vt, vb, hl, hr])

In [5]:
def HAAR(X, b, print_interval):
    _X = np.empty((X.shape[0], 200), dtype = 'int16')
    for i in range(X.shape[0]):
        
        # Extract image
        img = b[i]
        
        # Compute features
        for j in range(k):
            tl, tr, bl, br, vt, vb, hl, hr = rect[j]
            
            top = img[hr[0], hr[1]] - img[hl[0], hl[1]] - img[tr[0], tr[1]] + img[tl[0], tl[1]]
            bottom = img[br[0], br[1]] - img[bl[0], bl[1]] - img[hr[0], hr[1]] + img[hl[0], hl[1]]
            v_score = top - bottom
            
            left = img[vb[0], vb[1]] - img[bl[0], bl[1]] - img[vt[0], vt[1]] + img[tl[0], tl[1]]
            right = img[br[0], br[1]] - img[vb[0], vb[1]] - img[tr[0], tr[1]] + img[vt[0], vt[1]]
            h_score = left - right

            _X[i][j*2] = v_score
            _X[i][(j*2)+1] = h_score

    return _X

In [6]:
X_train_black = compute_black(X_train, 10000)

In [7]:
haar_train = HAAR(X_train, X_train_black, 10000)

In [8]:
X_test_black = compute_black(X_test, 1000)

In [9]:
haar_test = HAAR(X_test, X_test_black, 1000)

In [10]:
LR = LogisticRegression(penalty = 'l2', solver = 'lbfgs', max_iter = 1000, n_jobs = -1)

In [11]:
LR.fit(haar_train, y_train)

In [12]:
y_train_pred = LR.predict(haar_train)
print('Training accuracy:', sum(y_train_pred == y_train)/len(y_train))

Training accuracy: 0.9178166666666666


In [13]:
y_test_pred = LR.predict(haar_test)
print('Testing accuracy:', sum(y_test_pred == y_test)/len(y_test))

Testing accuracy: 0.9165
