<a href="https://colab.research.google.com/github/MingSheng92/Image_Classification/blob/master/Logistic_regression_softmax.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/MingSheng92/Image_Classification.git

Cloning into 'Image_Classification'...
remote: Enumerating objects: 62, done.[K
remote: Counting objects:   1% (1/62)[Kremote: Counting objects:   3% (2/62)[Kremote: Counting objects:   4% (3/62)[Kremote: Counting objects:   6% (4/62)[Kremote: Counting objects:   8% (5/62)[Kremote: Counting objects:   9% (6/62)[Kremote: Counting objects:  11% (7/62)[Kremote: Counting objects:  12% (8/62)[Kremote: Counting objects:  14% (9/62)[Kremote: Counting objects:  16% (10/62)[Kremote: Counting objects:  17% (11/62)[Kremote: Counting objects:  19% (12/62)[Kremote: Counting objects:  20% (13/62)[Kremote: Counting objects:  22% (14/62)[Kremote: Counting objects:  24% (15/62)[Kremote: Counting objects:  25% (16/62)[Kremote: Counting objects:  27% (17/62)[Kremote: Counting objects:  29% (18/62)[Kremote: Counting objects:  30% (19/62)[Kremote: Counting objects:  32% (20/62)[Kremote: Counting objects:  33% (21/62)[Kremote: Counting objects:  35% (22/62)[Kremo

In [2]:
%load /content/Image_Classification/scripts/preprocess.py
%load /content/Image_Classification/scripts/PCA.py
%load /content/Image_Classification/scripts/Bernoulli_NB.py
%load /content/Image_Classification/scripts/utility.py

from Image_Classification.scripts.preprocess import load_data, normalize, one_hot, flatten_image
from Image_Classification.scripts.Bernoulli_NB import NaiveBayes
from Image_Classification.scripts.PCA import PCA
from Image_Classification.scripts.utility import plot_predictions

import numpy as np
import keras
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
%matplotlib inline

Using TensorFlow backend.


In [3]:
#dataset='fashion_mnist'
dataset='mnist'
# load data set 
x_train, y_train, x_test, y_test = load_data(dataset, reshape=False)

# normalize the data set
x_train = normalize(x_train)
x_test = normalize(x_test)

x_train = flatten_image(x_train)
x_test = flatten_image(x_test)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
def softmax(Z):
    #Z = softmax(X.dot(self.W))
    #z1 = np.add(Z, -Z.max(axis=0))
    e_Z = np.exp(Z)
    A = e_Z / e_Z.sum(axis = 1, keepdims = True)
    return A

def get_accuracy(y_pre,y):
    count = y_pre == y
    accuracy = count.sum()/len(count)
    return accuracy
    
class LogisticRegression(object):
    # initialize class value for later processing purpose
    def __init__(self, dataset, label):
        self.num_inputs  = dataset.shape[1]
        self.num_classes = len(set(label))
        self.X           = dataset
        self.y           = label
        self.W           = np.random.randn(self.num_inputs, self.num_classes)
        self.b           = np.zeros([self.num_classes,1], dtype=float)

    # calculate gradient 
    def softmax_grad(self, X, y):
        A = softmax(X.dot(self.W))     # shape of (N, C)
        id0 = range(X.shape[0])  # number of train data
        A[id0, y] -= 1           # A - Y, shape of (N, C)
        return X.T.dot(A)/X.shape[0] 

    # cost or loss function  
    # removed Transition matrix dot here because 
    def softmax_loss(self):
        A = softmax(self.X.dot(self.W)) 
        id0  = range(self.X.shape[0])
        loss = -np.mean(np.log(A[id0, self.y]))
        
        return loss
    
    def eval(self, val_x, val_y):
        # calculate loss
        A = softmax(val_x.dot(self.W))
        id0 = range(val_x.shape[0])
        val_loss = -np.mean(np.log(A[id0, val_y]))
        
        # calculate accuracy 
        y_pred  = self.predict(val_x)
        val_acc = get_accuracy(y_pred, val_y)
        
        return val_loss, val_acc
        
    # train softmax logistic regression
    def train(self, train_x, train_y, val_x, val_y, lr = 0.01, n_epoches = 150, tol = 1e-5, batch_size = 10):
        # keep a copy of weights to for weight update later
        W_old = self.W.copy()
        ep = 0 
        # store history of loss
        loss_hist = [self.softmax_loss()] 
        #loss_hist = []
        N = train_x.shape[0]
        nbatches = int(np.ceil(float(N)/batch_size))
        while ep < n_epoches: 
            ep += 1 
            mix_ids = np.random.permutation(N) # mix data 
            
            # run by batch
            for i in range(nbatches):
                # get the i-th batch
                batch_ids = mix_ids[batch_size*i:min(batch_size*(i+1), N)] 
                X_batch, y_batch = train_x[batch_ids], train_y[batch_ids]
                self.W -= lr * self.softmax_grad(X_batch, y_batch)
                
            # evaluate current model
            if ep % 10 == 0 or ep == 1:
                val_loss, val_acc = self.eval(val_x, val_y)
                message = 'Epoch %d, val Loss %.4f, val Acc %.4f' % (ep, val_loss, val_acc)
                print(message)
            
            # append history
            loss_hist.append(self.softmax_loss())
            
            # stop the looping process if the improvement rate is too low
            if np.linalg.norm(self.W - W_old)/self.W.size < tol:
                print('reached tolerance level.')
                break 
                
            # update previous W to new W for next interation
            W_old = self.W.copy()

        return loss_hist 
    
    # predict function
    def predict(self, X):
        A = softmax(X.dot(self.W))
        return np.argmax(A, axis = 1)
    
    # return probability of classes
    def predict_proba(self, X):
        A = softmax(X.dot(self.W))
        return A 
    
    def cross_fold(self, lr=0.01, K=10, n_epoches=50):
        # create K fold on the current dataset
        fold_count = 1
        k_fold = KFold(n_splits=K, random_state=None, shuffle=False)

        # perform K-fold cv
        for train_idx, val_idx in k_fold.split(self.X):
            print("Fold :", fold_count)
            self.train(self.X[train_idx], self.y[train_idx], self.X[val_idx], self.y[val_idx],
                       lr, n_epoches, tol = 1e-5, batch_size = 300)
            print("--------------------------------------------------------")
            fold_count += 1

In [5]:
from sklearn.model_selection import KFold
from keras.utils import to_categorical

lg = LogisticRegression(x_train, y_train)
lg.cross_fold()

Fold : 1
Epoch 1, val Loss 7.3253, val Acc 0.2280
Epoch 10, val Loss 2.2135, val Acc 0.6172
Epoch 20, val Loss 1.4847, val Acc 0.7202
Epoch 30, val Loss 1.2078, val Acc 0.7643
Epoch 40, val Loss 1.0562, val Acc 0.7907
Epoch 50, val Loss 0.9591, val Acc 0.8083
--------------------------------------------------------
Fold : 2
Epoch 1, val Loss 1.0150, val Acc 0.8002
Epoch 10, val Loss 0.9530, val Acc 0.8103
Epoch 20, val Loss 0.8986, val Acc 0.8185
Epoch 30, val Loss 0.8546, val Acc 0.8260
Epoch 40, val Loss 0.8184, val Acc 0.8327
reached tolerance level.
--------------------------------------------------------
Fold : 3
Epoch 1, val Loss 0.9016, val Acc 0.8128
reached tolerance level.
--------------------------------------------------------
Fold : 4
Epoch 1, val Loss 0.7709, val Acc 0.8440
reached tolerance level.
--------------------------------------------------------
Fold : 5
Epoch 1, val Loss 0.7748, val Acc 0.8380
reached tolerance level.
--------------------------------------------

In [0]:
y_pred = lg.predict(x_test)

In [19]:
acc = get_accuracy(y_pred, y_test)
print(acc*100.)

83.74000000000001


In [20]:
dataset='fashion_mnist'

# load data set 
x_train, y_train, x_test, y_test = load_data(dataset, reshape=False)

# normalize the data set
x_train = normalize(x_train)
x_test = normalize(x_test)

x_train = flatten_image(x_train)
x_test = flatten_image(x_test)

Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz


In [23]:
lg = LogisticRegression(x_train, y_train)
lg.cross_fold()

Fold : 1
Epoch 1, val Loss 6.4991, val Acc 0.2393
Epoch 10, val Loss 2.4028, val Acc 0.5768
Epoch 20, val Loss 1.8572, val Acc 0.6397
Epoch 30, val Loss 1.6137, val Acc 0.6735
Epoch 40, val Loss 1.4697, val Acc 0.6955
Epoch 50, val Loss 1.3717, val Acc 0.7107
--------------------------------------------------------
Fold : 2
Epoch 1, val Loss 1.3843, val Acc 0.7097
Epoch 10, val Loss 1.3187, val Acc 0.7197
Epoch 20, val Loss 1.2610, val Acc 0.7315
Epoch 30, val Loss 1.2124, val Acc 0.7363
Epoch 40, val Loss 1.1718, val Acc 0.7432
reached tolerance level.
--------------------------------------------------------
Fold : 3
Epoch 1, val Loss 1.0822, val Acc 0.7425
reached tolerance level.
--------------------------------------------------------
Fold : 4
Epoch 1, val Loss 1.0723, val Acc 0.7530
reached tolerance level.
--------------------------------------------------------
Fold : 5
Epoch 1, val Loss 1.1297, val Acc 0.7408
reached tolerance level.
--------------------------------------------

In [24]:
y_pred = lg.predict(x_test)
acc = get_accuracy(y_pred, y_test)
print(acc*100.)

73.61999999999999
