# loading

In [1]:
import os
import struct
import random
import numpy as np
import matplotlib.pyplot as plt

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                                % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)

    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII",
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

#### Loading the data

X_train, y_train = load_mnist('./data', kind='newtrain')
X_test, y_test = load_mnist('./data', kind='new1k')

In [2]:
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    images_path = os.path.join(path,
                               '%s-patterns-idx3-ubyte'
                               % kind)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII",
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(60000, 784)

    return images

X_testall = load_mnist('./data', kind='testall')

In [3]:
print('train_sample_number:\t:%d, column_number:%d' %(X_train.shape[0], X_train.shape[1]))
print('test_sample_number:\t:%d, column_number:%d' %(X_test.shape[0], X_test.shape[1]))
print('testall_sample_number\t:%d, column_number:%d' %(X_testall.shape[0], X_testall.shape[1]))

train_sample_number:	:80000, column_number:784
test_sample_number:	:10000, column_number:784
testall_sample_number	:60000, column_number:784


# preprocessing

In [4]:
X_train=X_train/255
X_test=X_test/255
X_testall=X_testall/255

# classifer

In [5]:
class myClassifier(object):    
    """
    ovr
    """
    def __init__(self, C=1000, eta=0.01, batch_size=60, epochs=200, epsilon=1e-8, 
                 shuffle=True, params=None, w=0, b=0):
        self.C = C
        self.eta = eta
        self.batch_size = batch_size
        self.epochs = epochs
        self.epsilon = epsilon
        self.class_num = 0
        self.shuffle = shuffle
        self.update_count = 0
        self.w = 0
        self.b = 0
#         self.params['aver_w'] = w
#         self.params['aver_b'] = b
        
    def fit(self, X, y, params=None, w=0, b=0, testscore = None, eval_score=None):
        # X_num = m, X_fea = n
        # m = np.shape(X)[0], n = np.shape(X)[1]
        
        X_num, X_fea = np.shape(X)
        #X_num=60000 X_fea=28*28
        self.class_num=len(np.unique(y))
        #class_num=10
        
        if params is None:
            print('fit params=None')
            self.params = {
                'w': np.random.randn(X_fea, self.class_num), #(10, 784) 정규분포난수
                'b': np.random.randn(1, self.class_num),
                'w_': np.random.randn(X_fea, self.class_num),
                'b_': np.random.randn(1, self.class_num),
                'tmpw': 0,
                'tmpb': 0
            }
        cnt=1
        if eval_score is None:
            self.score_val = 0
                
        for Xi in range(self.epochs):
            s_data, s_labels = self.shuffling(X, y)
            encoded_y=self.encoding(s_labels)
            avg_loss = 0
            batch_count = int(X_num / self.batch_size)
            for t in range(int(batch_count)):
#               self.params['tmpw'] = temp_w, self.params['tmpb'] = temp_b
                batch_X, batch_y, bs=self.batching(s_data, encoded_y, t)
                batch_X = np.reshape(batch_X, (bs, X_fea))
                batch_y = np.reshape(batch_y, (bs, self.class_num))
                z = self.net_input(batch_X)
                loss = self.hinge_loss(batch_y, z)
                self.update_w_b(batch_X, batch_y, z, bs, cnt)
                cnt+=1
                avg_loss += loss
                self.update_count += 1
##ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
# aver_w = w_ , w_ = w
            self.params['tmpw'] = (cnt * (cnt/(cnt+1)) * 
                                   self.params['w_'] + (1/(cnt+1))*self.params['w'])
            self.params['tmpb'] = (cnt * (cnt/(cnt+1)) * 
                                   self.params['b_'] + (1/(cnt+1))*self.params['b'])
            prev_score = self.score_val
            pres_score = self.score(X, y)
            print("epochs: ", Xi)
            print("prev_score: ", prev_score)
            print("pres_score: ", pres_score,"\n")
            if prev_score < pres_score:
                self.score_val = pres_score
            if self.det_weight(X, y, 1) < self.det_weight(X, y): # temp_w, temp_b
                self.params['w_'] = self.params['tmpw']
                self.params['b_'] = self.params['tmpb']
            avg_loss /= batch_count
        return self
    
    def det_weight(self, X, y, aver=0):
        if aver:
            w1 = self.params['w_']
            b1 = self.params['b_']
        else:
            w1 = self.params['tmpw']
            b1 = self.params['tmpb']
        temp = np.dot(X, w1) + b1
#         temp = temp.T
        pred = np.argmax(temp, axis=1)
        sco = np.mean(pred == y)
        return sco
    
    def update_w_b(self, batch_X, batch_y, z, bs, cnt):
        n = np.shape(batch_X)[1]  # num of features
        delta_w = np.zeros(self.params['w'].shape)
        delta_b = np.zeros(self.params['b'].shape)
        z = np.reshape(z, (bs, self.class_num))
        temp = 1 - np.multiply(batch_y, z)
        temp[temp <= 0] = 0
        temp[temp > 0] = 1
        y_temp = np.multiply(batch_y, temp.reshape(bs, self.class_num))
        delta_w = -(1 / bs) * np.matmul(batch_X.T, y_temp) + (1 / self.C) * self.params['w']
        delta_b = -(1 / bs) * np.sum(y_temp, axis=0)
        self.params['w'] = self.params['w'] - (self.eta / (1 + self.epsilon * cnt)) * delta_w
        self.params['b'] = self.params['b'] - (self.eta / (1 + self.epsilon * cnt)) * delta_b
        
        return self.params
    
    def hinge_loss(self, y, z):
        loss = 1 - np.multiply(y, z)
        loss[loss < 0] = 0
        loss = np.mean(loss)
        return loss
    
    def net_input(self, X):  # net_input() = forward_prop(), generate z
        z = np.matmul(X, self.params['w']) + self.params['b']
        return z

    def encoding(self, y):
        encoded_y=-1*np.ones((np.shape(y)[0],self.class_num))
        for i in range(np.shape(y)[0]):
            encoded_y[i,y[i]] = 1
        return encoded_y
                
    def shuffling(self, X, y):
        temp_s=list(zip(X,y))
        random.shuffle(temp_s)
        X,y=zip(*temp_s)
        return X,y
    
    def batching(self, X, y, t):                         
        batch_X = X[t * self.batch_size : min(len(X), (t+1) * self.batch_size)]
        batch_y = y[t * self.batch_size : min(len(X), (t+1) * self.batch_size)]
        last_size = min(len(X), (t+1) * self.batch_size) - t * self.batch_size
        
        return batch_X, batch_y,last_size
    
    def predict(self, X):
        m = np.shape(X)[0]
        class_score = self.net_input(X)  # return z
        pred = np.argmax(class_score, axis=1)

        return pred
    
    def score(self, X, y):
        pred = self.predict(X)
        score = np.mean(pred == y)
        
        return score
    
    def get_params(self, deep=True):
        return {'C':self.C, 'batch_size':self.batch_size, 'epochs':self.epochs,
               'eta': self.eta, 'w':self.params['w_'], 'b':self.params['b_']}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def test(self, X, w, b):
        z = np.matmul(X, np.array(w)) + np.array(b)
        p = np.argmax(z, axis=1)
        return p

In [6]:
mine=myClassifier()
mine.fit(X_train, y_train)

fit params=None
epochs:  0
prev_score:  0
pres_score:  0.52655 

epochs:  1
prev_score:  0.52655
pres_score:  0.66385 

epochs:  2
prev_score:  0.66385
pres_score:  0.7223875 

epochs:  3
prev_score:  0.7223875
pres_score:  0.755325 

epochs:  4
prev_score:  0.755325
pres_score:  0.777475 

epochs:  5
prev_score:  0.777475
pres_score:  0.7923375 

epochs:  6
prev_score:  0.7923375
pres_score:  0.8035125 

epochs:  7
prev_score:  0.8035125
pres_score:  0.8132125 

epochs:  8
prev_score:  0.8132125
pres_score:  0.820725 

epochs:  9
prev_score:  0.820725
pres_score:  0.827175 

epochs:  10
prev_score:  0.827175
pres_score:  0.832875 

epochs:  11
prev_score:  0.832875
pres_score:  0.8373625 

epochs:  12
prev_score:  0.8373625
pres_score:  0.84145 

epochs:  13
prev_score:  0.84145
pres_score:  0.84495 

epochs:  14
prev_score:  0.84495
pres_score:  0.847325 

epochs:  15
prev_score:  0.847325
pres_score:  0.8506625 

epochs:  16
prev_score:  0.8506625
pres_score:  0.8532875 

epochs:  1

epochs:  140
prev_score:  0.9087
pres_score:  0.9087125 

epochs:  141
prev_score:  0.9087125
pres_score:  0.908375 

epochs:  142
prev_score:  0.9087125
pres_score:  0.9085 

epochs:  143
prev_score:  0.9087125
pres_score:  0.9085375 

epochs:  144
prev_score:  0.9087125
pres_score:  0.9088125 

epochs:  145
prev_score:  0.9088125
pres_score:  0.908125 

epochs:  146
prev_score:  0.9088125
pres_score:  0.908225 

epochs:  147
prev_score:  0.9088125
pres_score:  0.908525 

epochs:  148
prev_score:  0.9088125
pres_score:  0.9084125 

epochs:  149
prev_score:  0.9088125
pres_score:  0.9082625 

epochs:  150
prev_score:  0.9088125
pres_score:  0.9083 

epochs:  151
prev_score:  0.9088125
pres_score:  0.908575 

epochs:  152
prev_score:  0.9088125
pres_score:  0.908275 

epochs:  153
prev_score:  0.9088125
pres_score:  0.9087 

epochs:  154
prev_score:  0.9088125
pres_score:  0.9084625 

epochs:  155
prev_score:  0.9088125
pres_score:  0.9084875 

epochs:  156
prev_score:  0.9088125
pres_s

<__main__.myClassifier at 0x7f9b2237c2d0>

In [7]:
mine.get_params()

{'C': 1000,
 'batch_size': 60,
 'epochs': 200,
 'eta': 0.01,
 'w': array([[ 1.0854312 ,  1.60474952,  1.29690399, ...,  0.1711392 ,
          0.93949008,  1.25477187],
        [-0.51138963, -0.40311992,  0.94269138, ...,  0.14857752,
          0.5367502 ,  0.86845323],
        [ 0.6607393 ,  0.15685686,  1.41669104, ..., -0.02112389,
         -0.46241083, -0.72440239],
        ...,
        [-1.00923905,  0.33886948,  0.4260792 , ..., -0.29764807,
         -0.22932422, -0.34106951],
        [ 1.97990312,  1.62949041,  2.11518993, ..., -0.069971  ,
         -0.61228839,  0.55258546],
        [-0.46404495, -1.02294704, -1.02440946, ...,  1.44857499,
         -0.57983122,  0.19809106]]),
 'b': array([[ 0.78105322, -0.61673477,  1.04131276,  0.77108051, -0.22672777,
         -2.02352649,  0.71710939, -1.70611866,  0.24443303,  0.33781992]])}

In [8]:
w=mine.get_params()['w']
b=mine.get_params()['b']

In [9]:
p=mine.test(X_testall,w,b)

In [10]:
file=open('/home/ryu/AI/MyClassifier/prediction.txt','w')
for i in range(len(p)):
    file.write('%s\n' %p[i])
file.close()

import pandas as pd

list_= mine.get_params()

w__=pd.DataFrame(list_['w'])
w__.to_csv("final_w.csv")

b__=pd.DataFrame(list_['b'])
b__.to_csv("final_b.csv")

import pandas as pd

list_2w=pd.read_csv('/home/ryu/AI/MyClassifier/final_w.csv')
list_2b=pd.read_csv('/home/ryu/AI/MyClassifier/final_b.csv')