In [28]:
# Imports
import numpy as np
from lda import LDA
from bayes import bayes_nonparametric
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
# Load data
trainDataNP = np.load("fashion_train.npy")
testDataNP = np.load("fashion_test.npy")

# Split data into X and y arrays
X_train = trainDataNP[:, :-1]
y_train = trainDataNP[:, -1]
X_test = testDataNP[:, :-1]
y_test = testDataNP[:, -1]

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [42]:
# Run through LDA
lda = LDA(k = 2)
X_train_proj = lda.fit_transform(X_train_scaled, y_train)
X_test_proj = lda.transform(X_test_scaled)

In [4]:
X_train_proj

array([[-1.23149779,  0.21399632],
       [ 2.49507967, -0.40452641],
       [-0.93425513,  0.1879579 ],
       ...,
       [-0.10597058,  1.34595721],
       [-0.68637265,  0.34283279],
       [ 3.01065924, -0.27911939]])

In [5]:
X_test_proj

array([[ 0.82971112,  0.24316942],
       [-0.89387159,  0.42502107],
       [-0.07860835,  1.6058462 ],
       ...,
       [-0.29280525, -1.33862382],
       [-0.23661279,  0.65166192],
       [-0.6864828 , -0.25190508]])

In [36]:
# Import

import numpy as np

# Naice non-parametric bayes classifier

class bayes_nonparametric1:
    '''
    Naive non-parametric Bayes classifier.

    This class implements a simple non-parametric Bayes classifier based on kernel density estimation.
    '''

    def __init__(self):
        pass

    def train(self, X_train, y_train, h):
        '''
        Train the classifier with the given training data.

        Params:
            X_train (numpy.ndarray): Training data features.
            y_train (numpy.ndarray): Training data labels.
            h (float): Bandwidth parameter for kernel density estimation.
        '''
        self.X_train = X_train
        self.y_train = y_train
        self.h = h # Bandwidth
        self.classes = np.unique(y_train)

        # Calculate class prior probailities
        self.calculate_class_priors()

    def calculate_class_priors(self):
        '''Calculate class prior probabilities from the training data.'''
        class_counts = {k: np.sum(self.y_train == k) for k in self.classes}
        n_samples = len(self.y_train)
        self.priors = {k: count/n_samples for k, count in class_counts.items()}
        
    def fit_kde(self, x):
        '''Kernel density estimate for given input point x.'''

        # Initialize multivariate feature distribution for each class
        multivariate_estimates = {k: 1 for k in self.classes}

        # Iterate over each class
        for k in self.classes:

            class_data = self.X_train[self.y_train == k] # Data in X_train belonging to class k
            n_total = class_data.shape[0] # Number of observations in class k

            # Iterate over each feature in class_data
            for i, X_i in enumerate(class_data.T):

                # Count observations in bandwith range from x
                # n_observations = sum(1 for obs in X_i if np.abs(x - obs) <= self.h)
                n_observations = np.sum(np.abs(X_i - x[i]) <= self.h)

                # Estimate univariate feature distribution for feature X
                kernel_estimate = n_observations / (n_total * 2 * self.h)

                # Update the multivariate feature distribution estimate
                multivariate_estimates[k] *= kernel_estimate

        return multivariate_estimates

    def get_posterior_probabilities(self, x):
        '''Calculate posterior probabilities for each class for an input point x.'''

        # Compute likelihoods
        likelihoods = self.fit_kde(x)

        # Compute evidence for each class
        evidence = sum(likelihoods[k] * self.priors[k] for k in self.classes)
        if evidence == 0: # In case of evidence is zero, add small constant
            evidence += 1e-12 

        # Compute posterior probabilities for each class
        posteriors = {k: likelihoods[k] * self.priors[k] / evidence for k in self.classes}

        return posteriors
    
    def _predict(self, x):
        '''Predict the class for a single data point x.'''
        # print('Shape of single point x:', x.shape)

        # Compute posterior probabilities for each class
        posteriors = self.get_posterior_probabilities(x)

        # Return class with highest posterior probabilities
        return max(posteriors, key = posteriors.get)
    
    def predict(self, X):
        '''Predict the class for an input array X.'''
        return [self._predict(x) for x in X]

In [41]:
bandwiths = np.linspace(.01, 1, 100)
for bw in bandwiths:
    bayes = bayes_nonparametric1()
    bayes.train(X_train_proj, y_train, h = bw)
    y_pred = bayes.predict(X_test_proj)
    print(round(bw, 2), accuracy_score(y_test, y_pred))

0.01 0.7126
0.02 0.7226
0.03 0.7252
0.04 0.7244
0.05 0.7246
0.06 0.7256
0.07 0.725
0.08 0.7252
0.09 0.7266
0.1 0.7276
0.11 0.7274
0.12 0.7294
0.13 0.7278
0.14 0.7274
0.15 0.7288
0.16 0.7282
0.17 0.727
0.18 0.7272
0.19 0.7264
0.2 0.7266
0.21 0.727
0.22 0.7268
0.23 0.726
0.24 0.7254
0.25 0.7262
0.26 0.725
0.27 0.7256
0.28 0.7256
0.29 0.7272
0.3 0.728
0.31 0.728
0.32 0.728
0.33 0.7282
0.34 0.7294
0.35 0.7294
0.36 0.7284
0.37 0.728
0.38 0.7278
0.39 0.727
0.4 0.7272
0.41 0.7268
0.42 0.7264
0.43 0.7254
0.44 0.7244
0.45 0.7242
0.46 0.724
0.47 0.723
0.48 0.7232
0.49 0.7226
0.5 0.7236
0.51 0.7232
0.52 0.7238
0.53 0.7232
0.54 0.723
0.55 0.7232
0.56 0.723
0.57 0.7222
0.58 0.7216
0.59 0.7202
0.6 0.7196
0.61 0.72
0.62 0.7196
0.63 0.7198
0.64 0.72
0.65 0.7206
0.66 0.7202
0.67 0.7208
0.68 0.72
0.69 0.7194
0.7 0.7184
0.71 0.7174
0.72 0.7164
0.73 0.7166
0.74 0.7162
0.75 0.7152
0.76 0.7136
0.77 0.7138
0.78 0.713
0.79 0.7118
0.8 0.712
0.81 0.711
0.82 0.7108
0.83 0.7116
0.84 0.7106
0.85 0.7106
0.86 0.71
0

In [8]:
arr1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

# for col in arr1:
#     print(row)
#     print('next')

dict1 = {'a': 1, 'b': 2}

dict1['a'] *= 100

dict1


{'a': 100, 'b': 2}

In [24]:
arr1 = np.array(
    [1, 2, 3, 4, 5]
)

arr1.ndim

1