In [1]:
# load libs
import torch
import argparse
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
from data.datasets import MNIST
import torch.utils.data as data_utils
from sklearn.decomposition import PCA
import torch.nn.functional as F
from torch.autograd import Variable

In [2]:
print (torch.__version__)
batch_size=1
test_batch_size=1
kwargs={}
train_loader=data_utils.DataLoader(MNIST(root='./data',train=True,process=False,transform=transforms.Compose([
    transforms.Scale((32,32)),
    transforms.ToTensor(),
])),batch_size=batch_size,shuffle=True,**kwargs)


test_loader=data_utils.DataLoader(MNIST(root='./data',train=False,process=False,transform=transforms.Compose([
    transforms.Scale((32,32)),
    transforms.ToTensor(),
])),batch_size=test_batch_size,shuffle=True,**kwargs)

0.2.0_3


In [3]:
def create_all_train_dataset():
    datasets = []
    train_label = []
    for data in train_loader:
        data_numpy = data[0].numpy()
        label_numpy = data[1].numpy()
        data_numpy = np.squeeze(data_numpy)
        datasets.append(data_numpy)
        train_label.append(label_numpy)

    datasets = np.array(datasets)
    datasets=np.expand_dims(datasets,axis=1)
    print ('Numpy train dataset shape is {}'.format(datasets.shape))
    return datasets,train_label

In [4]:
def create_all_test_dataset():
    datasets = []
    test_label = []
    for data in test_loader:
        data_numpy = data[0].numpy()
        label_numpy = data[1].numpy()
        data_numpy = np.squeeze(data_numpy)
        datasets.append(data_numpy)
        test_label.append(label_numpy)

    datasets = np.array(datasets)
    datasets=np.expand_dims(datasets,axis=1)
    print ('Numpy test dataset shape is {}'.format(datasets.shape))
    return datasets,test_label

In [5]:
def PCA_and_augment(data_in, num_key_comp):
    # data reshape
    data=np.reshape(data_in,(data_in.shape[0],-1))
    print ('PCA_and_augment: {}'.format(data.shape))
    # mean removal
    mean = np.mean(data, axis=0)
    datas_mean_remov = data - mean
    print ('PCA_and_augment meanremove shape: {}'.format(datas_mean_remov.shape))

    # PCA, retain all components
    #pca=PCA(n_components = num_key_comp)
    pca=PCA(n_components=num_key_comp)
    pca.fit(datas_mean_remov)
    
    #eng=np.cumsum(pca.explained_variance_ratio_)
    #f_num = np.count_nonzero(eng < 0.999)
    #comps=pca.components_[:f_num,:]
    comps=pca.components_
    
    # augment, DC component doesn't
    comps_aug=[vec*(-1) for vec in comps[:-1]]
    comps_complete=np.vstack((comps,comps_aug))
    print ('PCA_and_augment comps_complete shape: {}'.format(comps_complete.shape))
    return comps_complete

In [6]:
from itertools import product
def fit_pca_shape(datasets,depth):
    factor=np.power(2,depth)
    length=32/factor
    print ('fit_pca_shape: length: {}'.format(length))
    idx1=range(0,int(length),2)
    idx2=[i+2 for i in idx1]
    print ('fit_pca_shape: idx1: {}'.format(idx1))
    data_lattice=[datasets[:,:,i:j,k:l] for ((i,j),(k,l)) in product(zip(idx1,idx2),zip(idx1,idx2))]
    data_lattice=np.array(data_lattice)
    print ('fit_pca_shape: data_lattice.shape: {}'.format(data_lattice.shape))

    #shape reshape
    data=np.reshape(data_lattice,(data_lattice.shape[0]*data_lattice.shape[1],data_lattice.shape[2],2,2))
    print ('fit_pca_shape: reshape: {}'.format(data.shape))
    return data

In [7]:
def ret_filt_patches(aug_anchors,input_channels):
    shape=int(aug_anchors.shape[1]/4)
    num=int(aug_anchors.shape[0])
    filt=np.reshape(aug_anchors,(num,shape,4))
    
    # reshape to kernels, (# output_channels,# input_channels,2,2)
    filters=np.reshape(filt,(num,shape,2,2))

    return filters

In [8]:
def conv_and_relu(filters,datasets,stride=2):
    # torch data change
    filters_t=torch.from_numpy(filters)
    datasets_t=torch.from_numpy(datasets)

    # Variables
    filt=Variable(filters_t).type(torch.FloatTensor)
    data=Variable(datasets_t).type(torch.FloatTensor)

    # Convolution
    output=F.conv2d(data,filt,stride=stride)

    # Relu
    relu_output=F.relu(output)

    return relu_output,filt

In [9]:
def one_stage_saak_trans(datasets=None,depth=0,num_key_comp=5):

    # intial dataset, (60000,1,32,32)
    # channel change: 1->7
    print ('one_stage_saak_trans: datasets.shape {}'.format(datasets.shape))
    input_channels=datasets.shape[1]

    # change data shape, (14*60000,4)
    data_flatten=fit_pca_shape(datasets,depth)
    
    # augmented components, first round: (7,4), only augment AC components
    comps_complete=PCA_and_augment(data_flatten,num_key_comp)
    print ('one_stage_saak_trans: comps_complete: {}'.format(comps_complete.shape))

    # get filter, (7,1,2,2) 
    filters=ret_filt_patches(comps_complete,input_channels)
    print ('one_stage_saak_trans: filters: {}'.format(filters.shape))

    # output (60000,7,14,14)
    relu_output,filt=conv_and_relu(filters,datasets,stride=2)

    data=relu_output.data.numpy()
    print ('one_stage_saak_trans: output: {}'.format(data.shape))
    return data,filt,relu_output,filters


In [10]:
def five_stage_saak_trans():
    filters = []
   
    data_train,train_label=create_all_train_dataset()
    data_test,test_label = create_all_test_dataset()
    original_train_dataset=data_train
    original_test_dataset=data_test
    
    num_key_comp = [3,4,7,6,8]
    
    for i in range(5):
        print ('{} stage of saak transform_train: '.format(i))      
        data_train,filt,output,f=one_stage_saak_trans(data_train,depth=i,num_key_comp=num_key_comp[i])
        filters.append(f)

    for i in range(5):
        print ('{} stage of saak transform_test: '.format(i))
        relu_output,filt=conv_and_relu(filters[i],data_test,stride=2)
        data_test=relu_output.data.numpy()
        
    return data_train,data_test,train_label,test_label

In [11]:
saak_train,saak_test,train_label,test_label=five_stage_saak_trans()

Numpy train dataset shape is (60000, 1, 32, 32)
Numpy test dataset shape is (10000, 1, 32, 32)
0 stage of saak transform_train: 
one_stage_saak_trans: datasets.shape (60000, 1, 32, 32)
fit_pca_shape: length: 32.0
fit_pca_shape: idx1: range(0, 32, 2)
fit_pca_shape: data_lattice.shape: (256, 60000, 1, 2, 2)
fit_pca_shape: reshape: (15360000, 1, 2, 2)
PCA_and_augment: (15360000, 4)
PCA_and_augment meanremove shape: (15360000, 4)
PCA_and_augment comps_complete shape: (5, 4)
one_stage_saak_trans: comps_complete: (5, 4)
one_stage_saak_trans: filters: (5, 1, 2, 2)
one_stage_saak_trans: output: (60000, 5, 16, 16)
1 stage of saak transform_train: 
one_stage_saak_trans: datasets.shape (60000, 5, 16, 16)
fit_pca_shape: length: 16.0
fit_pca_shape: idx1: range(0, 16, 2)
fit_pca_shape: data_lattice.shape: (64, 60000, 5, 2, 2)
fit_pca_shape: reshape: (3840000, 5, 2, 2)
PCA_and_augment: (3840000, 20)
PCA_and_augment meanremove shape: (3840000, 20)
PCA_and_augment comps_complete shape: (7, 20)
one_stag

In [12]:
print(saak_train.shape)
print(saak_test.shape)
#print(saak_train[0])
saak_train = saak_train.reshape((60000,-1))
saak_test = saak_test.reshape((10000,-1))
print(saak_train.shape)
print(saak_test.shape)



(60000, 15, 1, 1)
(10000, 15, 1, 1)
(60000, 15)
(10000, 15)


In [13]:
#train_pca = PCA()
#train_pca.fit(saak_train)
#eng=np.cumsum(train_pca.explained_variance_ratio_)
#f_num = np.count_nonzero(eng < 0.90)
#print(f_num)
#saak_train=train_pca.transform(saak_train)[:,:f_num]
#saak_test=train_pca.transform(saak_test)[:,:f_num]
#print(saak_train.shape)
#print(saak_test.shape)

In [14]:
# def load_train_label():
#     f = open('./data/raw/train-labels-idx1-ubyte')
#     loaded = np.fromfile(file=f,dtype = np.uint8)
#     loaded = loaded[8:].reshape(60000).astype(np.uint8)
#     return loaded

# train_label = load_train_label()
# print(train_label.shape)
# #print(train_label[0])

In [15]:
# def load_test_label():
#     f = open('./data/raw/t10k-labels-idx1-ubyte')
#     loaded = np.fromfile(file=f,dtype = np.uint8)
#     loaded = loaded[8:].reshape(10000).astype(np.uint8)
#     return loaded
# test_label = load_test_label()
# print(test_label.shape)

In [16]:
from sklearn.svm import SVC

In [17]:
svm_train = SVC(
          C=1.0,
          cache_size=200,
          class_weight=None,
          coef0=0.0,
          decision_function_shape='ovr',
          degree=3,
          gamma='auto', 
          kernel='rbf',
          max_iter=2000,
          probability=False, 
          random_state=None, 
          shrinking=True,
          tol=0.001,
          verbose=False,
             )

In [18]:
svm_train.fit(saak_train,train_label)

  y = column_or_1d(y, warn=True)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=2000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [19]:
train_result = svm_train.predict(saak_train)

In [20]:
test_result = svm_train.predict(saak_test)

In [21]:
#accuray_train=np.count_nonzero(train_result==train_label)
#accuray_test=np.count_nonzero(test_result==test_label)
#print("train_accuray is: " + str(accuray_train/60000.0))
#print("test_accuray is: " + str(accuray_test/10000.0))


In [22]:
accuray = 0
for i in range(60000):
    if train_label[i]==train_result[i]:
        accuray = accuray + 1
print("num of correct classification_train: " + str(accuray))
print("accuray_train: " + str(accuray/60000.0))
accuray = 0
for i in range(10000):
    if test_label[i]==test_result[i]:
        accuray = accuray + 1
print("num of correct classification_test: " + str(accuray))
print("accuray_test: " + str(accuray/10000.0))

    
    
    

num of correct classification_train: 50981
accuray_train: 0.8496833333333333
num of correct classification_test: 8464
accuray_test: 0.8464
