In [1]:
import scipy.io as sio
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import timeit
import matplotlib as mpl

mpl.rcParams['figure.figsize'] = (10,15)
ds = sio.loadmat('face.mat')

In [2]:
X = ds['X']
l = ds['l'][0]
split = 0.8
W = 46
H = 56
IDs = 52
X = pd.DataFrame(X.transpose())
l = pd.DataFrame(l)

#bagging 
bag_ratio = 1
M_pca_ratio = 0.95
M_lda_ratio = 0.90
N = round(X.shape[0]*split*bag_ratio) #different for each bag now
print('    N:', N)

    N: 416


In [3]:
X_train, X_test, l_train, l_test = train_test_split(X, l, test_size=(1-split), stratify = l)
# X_bar = np.mean(X, axis=1)
X_train, X_test = X_train.values, X_test.values
l_train, l_test = l_train.values, np.transpose(l_test.values)
X_bar = np.transpose([np.mean(X_train, axis=0)])
X_bar = np.ndarray.flatten(X_bar)
print('    l_train.shape',l_train.shape)
print('    X_train.shape', X_train.shape)

    l_train.shape (416, 1)
    X_train.shape (416, 2576)


## Functions to apply 1.3_a to bagging

In [4]:
from random import randrange
#Take the current split data and bag randomly 
def make_bag(X_train, l_train):
    X_bag = []
    l_bag = []
    bag_size = round(len(X_train)*bag_ratio)
#     print(bag, bag_size)
    while len(X_bag) < bag_size:
        index = randrange(len(X_train))
        X_bag.append(X_train[index])
        l_bag.append(l_train[index])
    X_bar_bag = np.transpose([np.mean(X_bag, axis=0)])
    X_bar_bag= np.ndarray.flatten(X_bar_bag)
    return np.asarray(X_bag), np.asarray(l_bag), np.asarray(X_bar_bag)

In [5]:
def bag_train_space(X_bag, l_bag):

    train_subspaces = []
    
    for i in range(round(IDs*bag_ratio)):
        images = []
        for j in range(int(N)):
            if l_train[j][0] == i+1:
                images.append(X_train[j])
        train_subspaces.append(images)
        
    return np.asarray(train_subspaces)

In [6]:
def SWSB(train_subspaces):
    elements = np.asarray(train_subspaces[0])
    elements_mean = np.mean(elements, axis=0)
    elements = np.subtract(elements,elements_mean)
    S_W = np.matmul(elements.T,elements)
    mean_diff = np.atleast_2d(elements_mean)-X_bar
    S_B = np.matmul(mean_diff.T, mean_diff)
    for i in range(1,round(IDs*bag_ratio)):
        elements = np.asarray(train_subspaces[i])
        elements_mean = np.mean(elements, axis=0)
        elements = np.subtract(elements,elements_mean)
        S_W += np.matmul(elements.T,elements)
        mean_diff = np.atleast_2d(elements_mean)-X_bar
        S_B += np.matmul(mean_diff.T, mean_diff)
    
    return S_W, S_B
                   

In [7]:
#PCA
def PCA(X_train, X_bar):
    
    A = np.subtract(X_train,X_bar).T
#     print(np.matmul(A.T,A).shape)
    w_pca, v_pca = np.linalg.eigh((1/N)*np.matmul(A.T,A))
    inds = w_pca.argsort()[::-1]
    w_pca = w_pca[inds]
    v_pca = v_pca[:,inds]
    cum_w = np.cumsum(w_pca)/np.sum(w_pca);
    M_pca = np.argmax(cum_w>M_pca_ratio)
    w_pca = w_pca[:M_pca]
    v_pca = v_pca[:,:M_pca]
    v_pca = preprocessing.normalize(np.matmul(A,v_pca), axis=0)
    return w_pca, v_pca

In [8]:
#LDA
def LDA(S_W, S_B, v_pca):
    meh2 = np.dot(np.dot(v_pca.T,S_W),v_pca)
    meh1 = np.dot(np.dot(v_pca.T,S_B),v_pca)

    w_fld, v_fld = np.linalg.eigh(np.linalg.inv(meh2).dot(meh1))
    
    cum_w = np.cumsum(w_fld)/np.sum(w_fld);
    M_lda = np.argmax(cum_w>M_lda_ratio)
    
    inds = w_fld.argsort()[::-1]
    w_fld = w_fld[inds]
    v_fld = v_fld[:,inds]

    
    w_fld = w_fld[:M_lda]
    v_fld = v_fld[:,:M_lda]
    return w_fld, v_fld
    
    

In [9]:
#W_OPT
def w_opt(v_pca, v_fld): 
    v_opt = np.dot(v_pca,v_fld)
    v_opt = preprocessing.normalize(v_opt,axis=0)
    return v_opt

In [10]:
#Get error of each bag
def error(v_opt, X_train, X_test, l_train, l_test):
    Y_train = []
    for i in range(int(N)):
        Y_train.append(np.matmul(v_opt.T,X_train[i]))
        
    correct = 0
    l_pred = []
    
    for i in range(len(X_test)):
        y_test = np.matmul(v_opt.T, X_test[i])
        diff = np.subtract(Y_train, y_test)
        pred = np.diag(np.matmul(diff, diff.T))
        value = np.argmin(pred)
        l_pred.append(pred)
        if l_train[value] == l_test[0][i]:
            correct += 1
            
    return correct/len(l_test[0]), l_pred

#### Calling Functions 
***For loop***
- make bags
- make training subspaces
- make eigen subspaces & S_W & S_B 
- PCA 
- LDA 
- W_opt 
- Error calculation
- Committie machine


In [11]:
# TEST SCRIPT
X_bag_0, l_bag_0, X_bar_0 = make_bag(X_train, l_train)

print('    X bag shape', X_bag_0.shape,'l bag shape', l_bag_0.shape, 'X bar bag', X_bar_0.shape)

bag_0_train_space = bag_train_space(X_bag_0, l_bag_0)

print('    bag train space shape:', bag_0_train_space.shape)

S_W_0, S_B_0= SWSB(bag_0_train_space)

print('    S_W', S_W_0.shape, 'S_B', S_B_0.shape)

    X bag shape (416, 2576) l bag shape (416, 1) X bar bag (2576,)
    bag train space shape: (52, 8, 2576)
    S_W (2576, 2576) S_B (2576, 2576)


In [12]:
def N_bag_error(No_bags, X_train, l_train):
    bag_error = []
    bag_pred = []
    for i in range(No_bags):
        print('    Bag:', i+1)
        X_bag, l_bag, X_bar = make_bag(X_train, l_train)
        bag_space = bag_train_space(X_bag, l_bag)
        S_W, S_B = SWSB(bag_space)
        w_pca, v_pca = PCA(X_bag, X_bar)
        w_fld, v_fld = LDA(S_W, S_B, v_pca)
        v_opt = w_opt(v_pca, v_fld)
        err, l_pred = error(v_opt, X_bag, X_test, l_bag, l_test)
        print('    ', err)
        bag_error.append(err)
        bag_pred.append(l_pred)
        
        
    return bag_error, bag_pred      

In [13]:
# def commit_mac(bag_error, bag_pred):
#     for 
#     y_com = 

In [14]:
ten_bags_error, ten_bags_pred = N_bag_error(10, X_train, l_train)

    Bag: 1
     0.5480769230769231
    Bag: 2
     0.5096153846153846
    Bag: 3
     0.49038461538461536
    Bag: 4
     0.46153846153846156
    Bag: 5
     0.5192307692307693
    Bag: 6
     0.5096153846153846
    Bag: 7


MemoryError: 

In [None]:
print('    ', ten_bags_pred.shape)