# loading

In [1]:
import os
import struct
import random
import cv2
#import cupy as cp
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,'%s-labels-idx1-ubyte'% kind)
    images_path = os.path.join(path,'%s-images-idx3-ubyte'% kind)

    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',lbpath.read(8))
        labels = np.fromfile(lbpath,dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII",imgpath.read(16))
        images = np.fromfile(imgpath,np.uint8).reshape(len(labels), 784)

    return images, labels

#### Loading the data

X_train, y_train = load_mnist('./data', kind='newtrain')
X_test, y_test = load_mnist('./data', kind='new1k')

In [2]:
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    images_path = os.path.join(path,
                               '%s-patterns-idx3-ubyte'
                               % kind)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII",imgpath.read(16))
        images = np.fromfile(imgpath,dtype=np.uint8).reshape(60000, 784)

    return images

X_testall = load_mnist('./data', kind='testall')

In [3]:
print('train_sample_number:\t:%d, column_number:%d' %(X_train.shape[0], X_train.shape[1]))
print('test_sample_number:\t:%d, column_number:%d' %(X_test.shape[0], X_test.shape[1]))
print('testall_sample_number\t:%d, column_number:%d' %(X_testall.shape[0], X_testall.shape[1]))

train_sample_number:	:80000, column_number:784
test_sample_number:	:10000, column_number:784
testall_sample_number	:60000, column_number:784


# deskew

In [4]:
from scipy.ndimage import interpolation

def moments(image):
    c0,c1 = np.mgrid[:image.shape[0],:image.shape[1]] # A trick in numPy to create a mesh grid
    totalImage = np.sum(image) #sum of pixels
    m0 = np.sum(c0*image)/totalImage #mu_x
    m1 = np.sum(c1*image)/totalImage #mu_y
    m00 = np.sum((c0-m0)**2*image)/totalImage #var(x)
    m11 = np.sum((c1-m1)**2*image)/totalImage #var(y)
    m01 = np.sum((c0-m0)*(c1-m1)*image)/totalImage #covariance(x,y)
    mu_vector = np.array([m0,m1]) # Notice that these are \mu_x, \mu_y respectively
    covariance_matrix = np.array([[m00,m01],[m01,m11]]) # Do you see a similarity between the covariance matrix
    return mu_vector, covariance_matrix

In [5]:
def deskew(image):
    c,v = moments(image)
    alpha = v[0,1]/v[0,0]
    affine = np.array([[1,0],[alpha,1]])
    ocenter = np.array(image.shape)/2.0
    offset = c-np.dot(affine,ocenter)
    return interpolation.affine_transform(image,affine,offset=offset)

In [6]:
def deskewAll(X):
    currents = []
    for i in range(len(X)):
        currents.append(deskew(X[i].reshape(28,28)).flatten())
    return np.array(currents)

In [7]:
#X_train_deskewed = deskewAll(X_train)
X_test_deskewed = deskewAll(X_test)

https://github.com/fsix/mnist/blob/master/notebooks/Deskewing.ipynb

# hog

In [17]:
from skimage.feature import hog

def calc_hog_features(X, image_shape=(28, 28), pixels_per_cell=(8, 8)):
    fd_list = []
    for row in X:
        img = row.reshape(image_shape)
        fd = hog(img, orientations=8, pixels_per_cell=pixels_per_cell, cells_per_block=(1, 1))
        fd_list.append(fd)
    
    return np.array(fd_list)

In [19]:
#hoged_X_train = calc_hog_features(X_train, pixels_per_cell=(8, 8))
hoged_X_test = calc_hog_features(X_test_deskewed, pixels_per_cell=(8, 8))

https://www.kaggle.com/njanakiev/mnist-hog

# preprocessing

In [8]:
X_train=X_train/255
X_test=X_test/255
X_testall=X_testall/255

# classifer

In [9]:
class myClassifier(object):    
    """
    ovr
    """
    def __init__(self, C=1000, eta=0.01, batch_size=60, epochs=200, epsilon=1e-8, 
                 shuffle=True, params=None, w=0, b=0):
        self.C = C
        self.eta = eta
        self.batch_size = batch_size
        self.epochs = epochs
        self.epsilon = epsilon
        self.class_num = 0
        self.shuffle = shuffle
        self.update_count = 0
        self.w = 0
        self.b = 0
        
    def fit(self, X, y, params=None, w=0, b=0, testscore = None, eval_score=None):
        X_num, X_fea = np.shape(X)
        #X_num, X_fea = cp.shape(X)
        self.class_num=len(np.unique(y))
        #self.class_num=len(cp.unique(y))
        
        if params is None:
            print('fit params=None')
            self.params = {
                'w': np.random.randn(X_fea, self.class_num), #(10, 784) 정규분포난수
                #'w': c.random.randn(X_fea, self.class_num),
                'b': np.random.randn(1, self.class_num),
                #'b': cp.random.randn(1, self.class_num),
                'w_': np.random.randn(X_fea, self.class_num),
                #'w_': cp.random.randn(X_fea, self.class_num),
                'b_': np.random.randn(1, self.class_num),
                #'b_': cp.random.randn(1, self.class_num),
                'tmpw': 0,
                'tmpb': 0
            }
        cnt=1
        if eval_score is None:
            self.score_val = 0
            
        for Xi in range(self.epochs):
            s_data, s_labels = self.shuffling(X, y)
            encoded_y=self.encoding(s_labels)
            avg_loss = 0
            batch_count = int(X_num / self.batch_size)
            for t in range(int(batch_count)):
                batch_X, batch_y, bs=self.batching(s_data, encoded_y, t)
                batch_X = np.reshape(batch_X, (bs, X_fea))
                #batch_X = cp.reshape(batch_X, (bs, X_fea))
                batch_y = np.reshape(batch_y, (bs, self.class_num))
                #batch_y = cp.reshape(batch_y, (bs, self.class_num))
                z = self.net_input(batch_X)
                loss = self.hinge_loss(batch_y, z)
                self.update_w_b(batch_X, batch_y, z, bs, cnt)
                cnt+=1
                avg_loss += loss
                self.update_count += 1
                
            self.params['tmpw'] = (cnt * (cnt/(cnt+1)) * 
                                   self.params['w_'] + (1/(cnt+1))*self.params['w'])
            self.params['tmpb'] = (cnt * (cnt/(cnt+1)) * 
                                   self.params['b_'] + (1/(cnt+1))*self.params['b'])
            prev_score = self.score_val
            pres_score = self.score(X, y)
            print("epochs: ", Xi)
            print("prev_score: ", prev_score)
            print("pres_score: ", pres_score,"\n")
            if prev_score < pres_score:
                self.score_val = pres_score
            if self.det_weight(X, y, 1) < self.det_weight(X, y): # temp_w, temp_b
                self.params['w_'] = self.params['tmpw']
                self.params['b_'] = self.params['tmpb']
            avg_loss /= batch_count
        return self
    
    def det_weight(self, X, y, aver=0):
        if aver:
            w1 = self.params['w_']
            b1 = self.params['b_']
        else:
            w1 = self.params['tmpw']
            b1 = self.params['tmpb']
        temp = np.dot(X, w1) + b1
        #temp = cp.dot(X, w1) + b1
        
        pred = np.argmax(temp, axis=1)
        #pred = cp.argmax(temp, axis=1)
        sco = np.mean(pred == y)
        #sco = cp.mean(pred == y)
        return sco
    
    def update_w_b(self, batch_X, batch_y, z, bs, cnt):
        n = np.shape(batch_X)[1]  # num of features
        #n = cp.shape(batch_X)[1] 
        delta_w = np.zeros(self.params['w'].shape)
        #delta_w = cp.zeros(self.params['w'].shape)
        delta_b = np.zeros(self.params['b'].shape)
        #delta_b = cp.zeros(self.params['b'].shape)
        z = np.reshape(z, (bs, self.class_num))
        #z = cp.reshape(z, (bs, self.class_num))
        temp = 1 - np.multiply(batch_y, z)
        #temp = 1 - cp.multiply(batch_y, z)
        temp[temp <= 0] = 0
        temp[temp > 0] = 1
        y_temp = np.multiply(batch_y, temp.reshape(bs, self.class_num))
        #y_temp = cp.multiply(batch_y, temp.reshape(bs, self.class_num))
        delta_w = -(1 / bs) * np.matmul(batch_X.T, y_temp) + (1 / self.C) * self.params['w']
        #delta_w = -(1 / bs) * cp.matmul(batch_X.T, y_temp) + (1 / self.C) * self.params['w']
        delta_b = -(1 / bs) * np.sum(y_temp, axis=0)
        #delta_b = -(1 / bs) * cp.sum(y_temp, axis=0)
        self.params['w'] = self.params['w'] - (self.eta / (1 + self.epsilon * cnt)) * delta_w
        self.params['b'] = self.params['b'] - (self.eta / (1 + self.epsilon * cnt)) * delta_b
        
        return self.params
    
    def hinge_loss(self, y, z):
        loss = 1 - np.multiply(y, z)
        #loss = 1 - cp.multiply(y, z)
        loss[loss < 0] = 0
        loss = np.mean(loss)
        #loss = cp.mean(loss)
        return loss
    
    def net_input(self, X):  # net_input() = forward_prop(), generate z
        z = np.matmul(X, self.params['w']) + self.params['b']
        #z = cp.matmul(X, self.params['w']) + self.params['b']
        return z

    def encoding(self, y):
        y_array=np.array(y).reshape(-1,1)

        enc=OneHotEncoder()
        enc.fit(y_array)
        y_enc=enc.transform(y_array).toarray()
        
        encoded_y=-1*np.ones((np.shape(y_test)[0],10))
        
        y_enc=y_enc * 2 + encoded_y
        
        """
        encoded_y=-1*np.ones((np.shape(y)[0],self.class_num))
        #encoded_y=-1*cp.ones((cp.shape(y)[0],self.class_num))
        for i in range(np.shape(y)[0]):
            encoded_y[i,y[i]] = 1
        """
        return y_enc
                
    def shuffling(self, X, y):
        temp_s=list(zip(X,y))
        random.shuffle(temp_s)
        X,y=zip(*temp_s)
        return X,y
    
    def batching(self, X, y, t):                         
        batch_X = X[t * self.batch_size : min(len(X), (t+1) * self.batch_size)]
        batch_y = y[t * self.batch_size : min(len(X), (t+1) * self.batch_size)]
        last_size = min(len(X), (t+1) * self.batch_size) - t * self.batch_size
        
        return batch_X, batch_y,last_size
    
    def predict(self, X):
        m = np.shape(X)[0]
        #m = cp.shape(X)[0]
        class_score = self.net_input(X)  # return z
        pred = np.argmax(class_score, axis=1)
        #pred = cp.argmax(class_score, axis=1)

        return pred
    
    def score(self, X, y):
        pred = self.predict(X)
        score = np.mean(pred == y)
        #score = cp.mean(pred == y)
        
        return score
    
    def get_params(self, deep=True):
        return {'C':self.C, 'batch_size':self.batch_size, 'epochs':self.epochs,
               'eta': self.eta, 'w':self.params['w_'], 'b':self.params['b_']}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def test(self, X, w, b):
        #z = np.matmul(X, np.array(w)) + np.array(b)
        z = cp.matmul(X, cp.array(w)) + cp.array(b)
        #p = np.argmax(z, axis=1)
        p = cp.argmax(z, axis=1)
        return p

In [10]:
mine=myClassifier()
#mine.fit(X_train, y_train)
mine.fit(X_test_deskewed,y_test)

fit params=None
epochs:  0
prev_score:  0
pres_score:  0.875 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  1
prev_score:  0.875
pres_score:  0.9287 

epochs:  2
prev_score:  0.9287
pres_score:  0.9266 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  3
prev_score:  0.9287
pres_score:  0.8908 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  4
prev_score:  0.9287
pres_score:  0.8789 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  5
prev_score:  0.9287
pres_score:  0.9301 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  6
prev_score:  0.9301
pres_score:  0.9269 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  7
prev_score:  0.9301
pres_score:  0.9375 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  8
prev_score:  0.9375
pres_score:  0.8691 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  9
prev_score:  0.9375
pres_score:  0.9278 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  10
prev_score:  0.9375
pres_score:  0.9152 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  11
prev_score:  0.9375
pres_score:  0.9438 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  12
prev_score:  0.9438
pres_score:  0.9257 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  13
prev_score:  0.9438
pres_score:  0.9054 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  14
prev_score:  0.9438
pres_score:  0.9439 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  15
prev_score:  0.9439
pres_score:  0.8521 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  16
prev_score:  0.9439
pres_score:  0.9439 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  17
prev_score:  0.9439
pres_score:  0.9404 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  18
prev_score:  0.9439
pres_score:  0.905 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  19
prev_score:  0.9439
pres_score:  0.9401 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  20
prev_score:  0.9439
pres_score:  0.9431 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  21
prev_score:  0.9439
pres_score:  0.9473 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  22
prev_score:  0.9473
pres_score:  0.9512 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  23
prev_score:  0.9512
pres_score:  0.9051 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  24
prev_score:  0.9512
pres_score:  0.9156 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  25
prev_score:  0.9512
pres_score:  0.951 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  26
prev_score:  0.9512
pres_score:  0.9516 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


epochs:  27
prev_score:  0.9516
pres_score:  0.9531 



In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


KeyboardInterrupt: 

In [7]:
mine.get_params()

{'C': 1000,
 'batch_size': 60,
 'epochs': 200,
 'eta': 0.01,
 'w': array([[ 1.0854312 ,  1.60474952,  1.29690399, ...,  0.1711392 ,
          0.93949008,  1.25477187],
        [-0.51138963, -0.40311992,  0.94269138, ...,  0.14857752,
          0.5367502 ,  0.86845323],
        [ 0.6607393 ,  0.15685686,  1.41669104, ..., -0.02112389,
         -0.46241083, -0.72440239],
        ...,
        [-1.00923905,  0.33886948,  0.4260792 , ..., -0.29764807,
         -0.22932422, -0.34106951],
        [ 1.97990312,  1.62949041,  2.11518993, ..., -0.069971  ,
         -0.61228839,  0.55258546],
        [-0.46404495, -1.02294704, -1.02440946, ...,  1.44857499,
         -0.57983122,  0.19809106]]),
 'b': array([[ 0.78105322, -0.61673477,  1.04131276,  0.77108051, -0.22672777,
         -2.02352649,  0.71710939, -1.70611866,  0.24443303,  0.33781992]])}

In [8]:
w=mine.get_params()['w']
b=mine.get_params()['b']

In [9]:
p=mine.test(X_testall,w,b)

file=open('/home/ryu/AI/MyClassifier/prediction.txt','w')
for i in range(len(p)):
    file.write('%s\n' %p[i])
file.close()