# Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Packages

In [148]:
import os
import math
import time
import struct
import numpy as np
from datetime import datetime
from matplotlib import pyplot
from joblib import Parallel, delayed
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin

# Binary Classifier

In [170]:
class BinaryClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, batch_size=16, max_iter=100, learning_rate=0.01, random_state=1, C=100):
        self.batch_size = batch_size
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.C = C
        self.rgen = np.random.RandomState(self.random_state)
        
    def fit(self, X, y):
        # Exception Handling
        if self.C < 0:
            raise ValueError("The C value of %r must be positive" % self.C)
        if ((self.learning_rate < 0) or (self.learning_rate > 1)):
            raise ValueError("The learning_rate value of %r is invalid." % self.learning_rate,
                             "Set the learning_rate value between 0.0 and 1.0.")        
            
        n_batches = math.ceil(len(X) / self.batch_size)
        # Process the total number of data is not divided into batch size
        rest_batch_size = X.shape[0] - (n_batches-1) * self.batch_size
        
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
        self.b_ = 0.
        
        for epoch in range(self.max_iter):
            X, y = self.shuffle(X, y)
            
            Parallel(n_jobs=-1, require="sharedmem")(
                delayed(self.calculateGradientAndUpdate)(X, y, batch_size = self.batch_size, n_batch = j)
                for j in range(n_batches - 1)
            )
            self.calculateGradientAndUpdate(X, y, batch_size = rest_batch_size, n_batch = j)
            
        return self
    
    def predict(self, X):
        return np.where(self.hypothesis(X) >= 1, 1, -1)
    
    def hypothesis(self, X):
        return np.dot(X, self.w_) + self.b_
    
    def shuffle(self, X, y):
        shuffle_index = np.arange(X.shape[0])
        np.random.shuffle(shuffle_index)
        return X[shuffle_index], y[shuffle_index]
    
    def calculateGradientAndUpdate(self, X, y, batch_size, n_batch):
        X_mini = X[n_batch*batch_size : (n_batch+1)*batch_size]
        y_mini = y[n_batch*batch_size : (n_batch+1)*batch_size]
        
        grad_w = np.zeros(X.shape[1])
        grad_b = 0
        mask = np.less_equal(np.multiply(y_mini, self.hypothesis(X_mini), 1)
        
        Xy = np.multiply(X_mini.T, y_mini)
        masked_Xy = np.multiply(Xy, mask)
        grad_w = (np.sum(-masked_Xy, axis=1) / batch_size) + self.w_/self.C
        self.w_ -= self.learning_rate * grad_w
        
        masked_y = np.multiply(y_mini, mask)
        grad_b = np.sum(-masked_y, axis=0) / batch_size
        self.b_ -= self.learning_rate * grad_b

# Multiclass Classifier

In [150]:
class MulticlassClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, batch_size=16, max_iter=100, learning_rate=0.01, random_state=1, C=100):
        self.batch_size = batch_size
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.C = C
        
    def fit(self, X, y):
        self.labels = np.unique(y) # 0 ~ 9
        self.outputs_ = []
        for label in range(len(self.labels)):
            y_binary = np.where(y == label, 1, -1)
            b_c = BinaryClassifier(self.batch_size, self.max_iter, 
                                   self.learning_rate, self.random_state, self.C)
            b_c.fit(X, y_binary)
            self.outputs_.append(b_c)
        return self
        
    def predict(self, X):
        prediction = []
        for o in self.outputs_:
            prediction.append(o.hypothesis(X))
        return self.labels[np.argmax(prediction, axis=0)]

# MNIST Read Function

In [151]:
def read(images, labels):
    with open(labels, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)

    with open(images, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

def read_no_label(images):
    with open(images, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(60000, 784)
    return images

# Read MNIST & Split for Valiation (80k)

In [163]:
                            # 경로 수정하세요 !
X, y = read(os.getcwd() + '/data/newtrain-images-idx3-ubyte', 
            os.getcwd() + '/data/newtrain-labels-idx1-ubyte')
# X_test_no_label = read_no_label(os.getcwd()+'/data/testall-images-idx3-ubyte')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Preprocessing : StandardScaler

In [164]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Preprocessing : PCA

In [165]:
X_train_scaled = X_train_scaled.reshape(-1, 28*28)
pca = PCA(n_components=50)
X_train_scaled_pca = pca.fit_transform(X_train_scaled) 
X_test_scaled_pca = pca.transform(X_test_scaled)

# Preprocessing : Polynomial Feature Extraction

In [166]:
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False, order='F')
X_train_scaled_pca_poly = poly.fit_transform(X_train_scaled_pca)
X_test_scaled_pca_poly = poly.transform(X_test_scaled_pca)

## Check how many features

In [167]:
print(X_train.shape)
print(X_train_scaled_pca.shape)
print(X_train_scaled_pca_poly.shape)

(56000, 784)
(56000, 784)


# Set the Hyperparameters

In [168]:
MC=MulticlassClassifier(C=1000, learning_rate=0.01, batch_size=256)

# Train

In [None]:
print("Start Time : ", datetime.now())

start = time.time()
MC.fit(X_train_scaled_pca_poly, y_train)

print("End Time : ", datetime.now())
print("Training Time : ", time.time() - start)

Start Time :  2019-12-02 22:09:47.053450


# Test

In [None]:
y_pred = MC.predict(X_test_scaled_pca_poly)
score = accuracy_score(y_test, y_pred)

print(score)