# Bayes Classifier

1. Test to see if features are approximately normal in each class

In [1]:
''' Needed libraries '''

import numpy as np # For matrix operations and numerical processing
import matplotlib.pyplot as plt # For plotting
import os, sys # For filepaths

In [2]:
''' Add the datasets and libraries to the system path '''

# Find the path to our implementations
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
home_directory = os.path.dirname(parent_directory)
libraries_path = os.path.join(home_directory, 'Libraries')

# Find the path to the datasets
datasets_path = os.path.join(home_directory, 'Datasets')

# Add them both to the system path
sys.path.append(datasets_path)
sys.path.append(libraries_path)

In [3]:
''' Importing our Bayes classifier '''

from Bayes import Bayes_classifier

In [4]:
''' Importing main components from file '''
top_components = np.load(os.path.join(f"{home_directory}/Exported_Data", 'LDA_top2.npy')) # Import top_components from file

''' Import the projection matrix from file '''
projection_matrix = np.load(os.path.join(f"{home_directory}/Exported_Data", 'LDA_projection.npy')) # Import projection_matrix from file

In [5]:
''' Loading in the dataset and splitting into observations and labels '''

dataset= np.load(os.path.join(datasets_path, 'fashion_train.npy'))  # Load dataset
X = dataset[:, :-1] # Define X as all columns except the last one
y = dataset[:, -1] # Define y as the last column
X.shape[0]


10000

In [None]:
 ''' PLotting the distribution of the LDA features for each class'''

# for i in np.unique(y):
#     f1=projection_matrix[y==i][:,0]
#     f2=projection_matrix[y==i][:,1]
#     plt.hist(f1)
#     plt.hist(f2)
#     plt.show()


Class priors

In [7]:
''' class priors '''
class_priors=[]
n = X.shape[0]
for i in np.unique(y):
    class_prior = X[y==i].shape[0]/n
    class_priors.append(class_prior)

print(class_priors)


[0.2033, 0.1947, 0.2001, 0.2005, 0.2014]


Means of each feature for each class 

In [8]:
''' density functions of X for each class '''
# we assume each feature in each class has a normal distribution 
''' means for each class '''
means = dict({})
for i in np.unique(y):
    meanfeatures = []
    for feature in range(projection_matrix.shape[1]):
        mean=np.mean(projection_matrix[y==i][:,feature])
        meanfeatures.append(mean)

    means[i] = meanfeatures 
means

{0: [-1.2802700016019868, 0.6039621758063101],
 1: [11.165085908392799, -1.3346697113354848],
 2: [-1.792848734273685, -4.0010271878014345],
 3: [0.5517463994100597, 3.5860176785069986],
 4: [-1.8373911430659382, -1.4957990391986888]}

Variances of each feature for each class 

In [9]:
variances = dict({})
for i in np.unique(y):
    variancefeatures = []
    for feature in range(projection_matrix.shape[1]):
        variance=np.var(projection_matrix[y==i][:,feature])
        variancefeatures.append(variance)

    variances[i] = variancefeatures 
variances

{0: [1.6240272271400915, 1.4014820774384151],
 1: [8.070526988779214, 1.176553854948992],
 2: [1.0405445983810633, 2.3216706354737338],
 3: [2.369089465355057, 3.0492142495787906],
 4: [1.535525042922837, 3.4967538119843016]}

Density function of X for each class

# Bayes classifier

In [10]:
bc = Bayes_classifier(X, y, projection_matrix) # Initialize our Bayes classifier 

In [11]:
bc.means()
means[0]

[-1.2802700016019868, 0.6039621758063101]

In [12]:
bc.variances()

{0: [1.2743732683715912, 1.1838420829816851],
 1: [2.840867295172236, 1.084690672472568],
 2: [1.020070879096675, 1.5237029354417264],
 3: [1.539184675519821, 1.7461999454755435],
 4: [1.239163041299585, 1.8699609118867435]}

In [13]:
bc.class_priors()

[0.2033, 0.1947, 0.2001, 0.2005, 0.2014]

In [14]:
testx = [15,6]
bc.pdf([0.3,0.3])

{0: [0.1451135246137139, 0.3260624696246494],
 1: [9.357920123376283e-05, 0.11814829986985564],
 2: [0.04766845748945545, 0.0048730704426146855],
 3: [0.2557468902484938, 0.0388912422932719],
 4: [0.07273319529230014, 0.13452800069580953]}

In [15]:
bc.total_pdf(testx)

{0: 1.1818124910629716e-41,
 1: 2.4457297068619226e-12,
 2: 6.395339239846326e-70,
 3: 1.6731908060847695e-21,
 4: 1.8057330823087553e-45}

In [16]:
bc.sum(testx)

4.761835742614911e-13

In [17]:
bc.posterior_prob(testx)

{0: 5.0455851990636824e-30,
 1: 0.9999999992954928,
 2: 2.6874244536425612e-58,
 3: 7.045072000651873e-10,
 4: 7.637278193415283e-34}

In [18]:
import random
bc.prediction(testx)

1

In [19]:
import random 
count = 0
score = 0

for i in random.sample(range(0, projection_matrix.shape[0]), 100):
    count +=1
    # print(bc.prediction(projection_matrix[i]))
    # print(y[i])
    if bc.prediction(projection_matrix[i]) == y[i]: 
        score += 1
print(score/count)

0.74
