### LDA before logistic regression for pre-processing

In [3]:
# Implementing LDA as pre-processing step before logistic regression
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

# Importing the dataset
data = pd.read_csv('Heart.csv')
# convert to numpy array
data = data.values
# remove the first column
data = data[:,1:]
for i in range(data.shape[0]):
    if data[i, 2] == 'asymptomatic':
        data[i, 2] = 0
    elif data[i, 2] == 'nonanginal':
        data[i, 2] = 1
    elif data[i, 2] == 'nontypical':
        data[i, 2] = 2
    elif data[i, 2] == 'typical':
        data[i, 2] = 3
    if data[i,13] == 'Yes':
        data[i,13] = 1
    elif data[i,13] == 'No':
        data[i,13] = 0
    if data[i,12] == 'fixed':
        data[i,12] = 0
    elif data[i,12] == 'reversable':
        data[i,12] = 1
    if data[i,12] == 'normal':
        data[i,12] = 2

In [4]:
# convert the data to float
data = data.astype(float)

In [5]:
# split the data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(data[:,0:13], data[:,13], test_size=0.2, random_state=42)

In [6]:
# perform fisher linear discriminant on the data
# separate the data into two classes
mean_classes = np.zeros((2,13))
mean_classes[0,:] = np.mean(X_train[Y_train==0,:], axis=0)
mean_classes[1,:] = np.mean(X_train[Y_train==1,:], axis=0)
# compute the overall mean
mean_overall = np.mean(X_train, axis=0)

In [7]:
# compute the within class scatter matrix
S_W = np.zeros((13,13))
for i in range(2):
    S_i = np.zeros((13,13))
    for j in range(X_train[Y_train==i,:].shape[0]):
        x = X_train[Y_train==i,:][j,:].reshape(13,1)
        mean = mean_classes[i,:].reshape(13,1)
        S_i += (x-mean).dot((x-mean).T)
    S_W += S_i

In [8]:
# compute the between class scatter matrix
S_B = np.zeros((13,13))
for i in range(2):
    n_i = X_train[Y_train==i,:].shape[0]
    mean_i = mean_classes[i,:].reshape(13,1)
    mean_overall = mean_overall.reshape(13,1)
    S_B += n_i*(mean_i-mean_overall).dot((mean_i-mean_overall).T)

In [9]:
# compute the eigenvalues and eigenvectors of inv(S_W).dot(S_B)
eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
# sort the eigenvalues in descending order
indices = np.argsort(eig_vals)[::-1]
eig_vals = eig_vals[indices]
eig_vecs = eig_vecs[:,indices]
# select the eigenvectors corresponding to the largest eigenvalues
eigen_vector = eig_vecs[:,0:1]

In [10]:
# project the data onto the new subspace
X_train_fda = X_train.dot(eigen_vector)
X_test_fda = X_test.dot(eigen_vector)


In [11]:
# defining the logistic regression function
learning_rate = 0.01
no_of_iterations = 2000
# add a column of ones to the data
X_train_fda = np.hstack((np.ones((X_train_fda.shape[0],1)), X_train_fda))
# define a vector of weights of size dimension of data
weights = np.zeros((X_train_fda.shape[1],1))

In [12]:
# multiply the weights with the data
def combine(X, weights):
    return X.dot(weights)

In [13]:
# defining the sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [14]:
# implementing the gradient descent algorithm to find the required weights
prev_weights = np.zeros((X_train_fda.shape[1],1))
for i in range(no_of_iterations):
    z_value = combine(X_train_fda, weights)
    Y_predicted = sigmoid(z_value)
    # round the values to 0 or 1
    Y_predicted[Y_predicted>=0.5] = 1
    Y_predicted[Y_predicted<0.5] = 0
    # compute the error
    error = Y_predicted - Y_train.reshape(Y_train.shape[0],1)
    # compute the gradient
    gradient = X_train_fda.T.dot(error)
    # assign the current weights to the previous weights
    prev_weights = weights
    # update the weights
    weights = weights - learning_rate*gradient/X_train_fda.shape[0]
    # check if the weights have converged 
    if np.linalg.norm(weights-prev_weights) < 1e-4:
        break

In [15]:
# compute the test accuracy
X_test_fda = np.hstack((np.ones((X_test_fda.shape[0],1)), X_test_fda))
z_value = combine(X_test_fda, weights)
Y_predicted = sigmoid(z_value)
Y_predicted[Y_predicted>=0.5] = 1
Y_predicted[Y_predicted<0.5] = 0
count_0 = 0
count_1 = 0
for i in range(Y_test.shape[0]):
    if Y_test[i] == 0 and Y_predicted[i] == 0:
        count_0 += 1
    elif Y_test[i] == 1 and Y_predicted[i] == 1:
        count_1 += 1
print('Test accuracy: ', (count_0+count_1)/Y_test.shape[0]*100 ,'%')
test_accuracy_lda = (count_0+count_1)/Y_test.shape[0]*100

Test accuracy:  85.0 %


### PCA + LDA as pre-processing steps before logistic regression

In [16]:
# Implementing PCA before LDA and logistic regression
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

# Importing the dataset
data = pd.read_csv('Heart.csv')
# convert to numpy array
data = data.values
# remove the first column
data = data[:,1:]
for i in range(data.shape[0]):
    if data[i, 2] == 'asymptomatic':
        data[i, 2] = 0
    elif data[i, 2] == 'nonanginal':
        data[i, 2] = 1
    elif data[i, 2] == 'nontypical':
        data[i, 2] = 2
    elif data[i, 2] == 'typical':
        data[i, 2] = 3
    if data[i,13] == 'Yes':
        data[i,13] = 1
    elif data[i,13] == 'No':
        data[i,13] = 0
    if data[i,12] == 'fixed':
        data[i,12] = 0
    elif data[i,12] == 'reversable':
        data[i,12] = 1
    if data[i,12] == 'normal':
        data[i,12] = 2

In [17]:
# convert the data to float
data = data.astype(float)

In [18]:
# split the data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(data[:,0:13], data[:,13], test_size=0.2, random_state=42)

In [19]:
from sklearn.decomposition import PCA
no_of_PCs = 9
pca = PCA(n_components=no_of_PCs)
X_train_pca = pca.fit_transform(X_train)
# transform the test data using the same PCA object
X_test_pca = pca.transform(X_test)
X_train = X_train_pca
X_test = X_test_pca

In [20]:
# perform fisher linear discriminant on the data
# separate the data into two classes
mean_classes = np.zeros((2,no_of_PCs))
mean_classes[0,:] = np.mean(X_train[Y_train==0,:], axis=0)
mean_classes[1,:] = np.mean(X_train[Y_train==1,:], axis=0)
# compute the overall mean
mean_overall = np.mean(X_train, axis=0)

In [21]:
# compute the within class scatter matrix
S_W = np.zeros((no_of_PCs,no_of_PCs))
for i in range(2):
    S_i = np.zeros((no_of_PCs,no_of_PCs))
    for j in range(X_train[Y_train==i,:].shape[0]):
        x = X_train[Y_train==i,:][j,:].reshape(no_of_PCs,1)
        mean = mean_classes[i,:].reshape(no_of_PCs,1)
        S_i += (x-mean).dot((x-mean).T)
    S_W += S_i

In [22]:
# compute the between class scatter matrix
S_B = np.zeros((no_of_PCs,no_of_PCs))
for i in range(2):
    n_i = X_train[Y_train==i,:].shape[0]
    mean_i = mean_classes[i,:].reshape(no_of_PCs,1)
    mean_overall = mean_overall.reshape(no_of_PCs,1)
    S_B += n_i*(mean_i-mean_overall).dot((mean_i-mean_overall).T)

In [23]:
# compute the eigenvalues and eigenvectors of inv(S_W).dot(S_B)
eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
# sort the eigenvalues in descending order
indices = np.argsort(eig_vals)[::-1]
eig_vals = eig_vals[indices]
eig_vecs = eig_vecs[:,indices]
# select the eigenvectors corresponding to the largest eigenvalues
eigen_vector = eig_vecs[:,0:1]

In [24]:
# project the data onto the new subspace
X_train_fda = X_train.dot(eigen_vector)
X_test_fda = X_test.dot(eigen_vector)

In [25]:
# defining the logistic regression function
learning_rate = 0.01
no_of_iterations = 2000
# add a column of ones to the data
X_train_fda = np.hstack((np.ones((X_train_fda.shape[0],1)), X_train_fda))
# define a vector of weights of size dimension of data
weights = np.zeros((X_train_fda.shape[1],1))

In [26]:
# multiply the weights with the data
def combine(X, weights):
    return X.dot(weights)

In [27]:
# defining the sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [28]:
# implementing the gradient descent algorithm to find the required weights
prev_weights = np.zeros((X_train_fda.shape[1],1))
for i in range(no_of_iterations):
    z_value = combine(X_train_fda, weights)
    Y_predicted = sigmoid(z_value)
    # round the values to 0 or 1
    Y_predicted[Y_predicted>=0.5] = 1
    Y_predicted[Y_predicted<0.5] = 0
    # compute the error
    error = Y_predicted - Y_train.reshape(Y_train.shape[0],1)
    # compute the gradient
    gradient = X_train_fda.T.dot(error)
    # assign the current weights to the previous weights
    prev_weights = weights
    # update the weights
    weights = weights - learning_rate*gradient/X_train_fda.shape[0]
    # check if the weights have converged 
    if np.linalg.norm(weights-prev_weights) < 1e-4:
        break

In [29]:
# compute the test accuracy
X_test_fda = np.hstack((np.ones((X_test_fda.shape[0],1)), X_test_fda))
z_value = combine(X_test_fda, weights)
Y_predicted = sigmoid(z_value)
Y_predicted[Y_predicted>=0.5] = 1
Y_predicted[Y_predicted<0.5] = 0
count_0 = 0
count_1 = 0
for i in range(Y_test.shape[0]):
    if Y_test[i] == 0 and Y_predicted[i] == 0:
        count_0 += 1
    elif Y_test[i] == 1 and Y_predicted[i] == 1:
        count_1 += 1
print('Test accuracy: ', (count_0+count_1)/Y_test.shape[0]*100 ,'%')
test_accuracy_pca = (count_0+count_1)/Y_test.shape[0]*100

Test accuracy:  91.66666666666666 %


In [30]:
print('Number of test samples: ', Y_test.shape[0])
print('Number of training samples: ', Y_train.shape[0])
print('Test accuracy with PCA + LDA: ', test_accuracy_pca, '%')
print('Test accuracy with LDA: ', test_accuracy_lda, '%')

Number of test samples:  60
Number of training samples:  237
Test accuracy with PCA + LDA:  91.66666666666666 %
Test accuracy with LDA:  85.0 %
