In [1]:
# Imports
import numpy as np
from lda import LDA
from bayes import bayes_nonparametric
from sklearn.preprocessing import StandardScaler

In [2]:
# Load data
trainDataNP = np.load("fashion_train.npy")
testDataNP = np.load("fashion_test.npy")

# Split data into X and y arrays
X_train = trainDataNP[:, :-1]
y_train = trainDataNP[:, -1]
X_test = testDataNP[:, :-1]
y_test = testDataNP[:, -1]

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
# Run through LDA
lda = LDA(k = 2)
X_train_proj = lda.fit_transform(X_train_scaled, y_train)
X_test_proj = lda.transform(X_test_scaled)

In [4]:
X_train_proj

array([[-1.23149779,  0.21399632],
       [ 2.49507967, -0.40452641],
       [-0.93425513,  0.1879579 ],
       ...,
       [-0.10597058,  1.34595721],
       [-0.68637265,  0.34283279],
       [ 3.01065924, -0.27911939]])

In [5]:
X_test_proj

array([[ 0.82971112,  0.24316942],
       [-0.89387159,  0.42502107],
       [-0.07860835,  1.6058462 ],
       ...,
       [-0.29280525, -1.33862382],
       [-0.23661279,  0.65166192],
       [-0.6864828 , -0.25190508]])

In [18]:
# Import

import numpy as np

# Naice non-parametric bayes classifier

class bayes_nonparametric:
    '''
    Naive non-parametric Bayes classifier.

    This class implements a simple non-parametric Bayes classifier based on kernel density estimation.
    '''

    def __init__(self):
        pass

    def train(self, X_train, y_train, h):
        '''
        Train the classifier with the given training data.

        Params:
            X_train (numpy.ndarray): Training data features.
            y_train (numpy.ndarray): Training data labels.
            h (float): Bandwidth parameter for kernel density estimation.
        '''
        self.X_train = X_train
        self.y_train = y_train
        self.h = h # Bandwidth
        self.classes = np.unique(y_train)

        # Calculate class prior probailities
        self.calculate_class_priors()

    def calculate_class_priors(self):
        '''Calculate class prior probabilities from the training data.'''
        class_counts = {k: np.sum(self.y_train == k) for k in self.classes}
        n_samples = len(self.y_train)
        self.priors = {k: count/n_samples for k, count in class_counts.items()}
        
    def fit_kde(self, x):
        '''Kernel density estimate for given input point x.'''

        # Initialize multivariate feature distribution for each class
        multivariate_estimates = {k: 1 for k in self.classes}

        # Iterate over each class
        for k in self.classes:

            class_data = self.X_train[self.y_train == k] # Data in X_train belonging to class k
            n_total = class_data.shape[0] # Number of observations in class k

            # Iterate over each feature in class_data
            for i, X_i in enumerate(class_data.T):

                # Count observations in bandwith range from x
                # n_observations = sum(1 for obs in X_i if np.abs(x - obs) <= self.h)
                n_observations = np.sum(np.abs(X_i - x[i]) <= self.h)

                # Estimate univariate feature distribution for feature X
                kernel_estimate = n_observations / (n_total * 2 * self.h)

                # Update the multivariate feature distribution estimate
                multivariate_estimates[k] *= kernel_estimate

        return multivariate_estimates

    def get_posterior_probabilities(self, x):
        '''Calculate posterior probabilities for each class for an input point x.'''

        # Compute likelihoods
        likelihoods = self.fit_kde(x)

        # Compute evidence for each class
        evidence = sum(likelihoods[k] * self.priors[k] for k in self.classes)
        if evidence == 0: # In case of evidence is zero, add small constant
            evidence += 1e-12 

        # Compute posterior probabilities for each class
        posteriors = {k: likelihoods[k] * self.priors[k] / evidence for k in self.classes}

        return posteriors
    
    def _predict(self, x):
        '''Predict the class for a single data point x.'''
        # print('Shape of single point x:', x.shape)

        # Compute posterior probabilities for each class
        posteriors = self.get_posterior_probabilities(x)

        # Return class with highest posterior probabilities
        return min(posteriors, key = posteriors.get)
    
    def predict(self, X):
        '''Predict the class for an input array X.'''
        return [self._predict(x) for x in X]

In [19]:
bayes = bayes_nonparametric()
bayes.train(X_train_proj, y_train, h = .75)
y_pred = bayes.predict(X_test_proj)

In [7]:
X = X_test_proj
for x in X:
    print(x.shape)

(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)
(2,)


In [8]:
arr1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

# for col in arr1:
#     print(row)
#     print('next')

dict1 = {'a': 1, 'b': 2}

dict1['a'] *= 100

dict1


{'a': 100, 'b': 2}

In [9]:
arr1 = np.array([
    [1, 2, 3, 4, 5],
    [6, 7, 8, 9, 10],
    [11, 12, 13, 14, 15],
    [16, 17, 18, 19, 20],
    [21, 22, 23, 24, 25]
])

arr2 = arr1[:, :-1]
arr3 = arr1[:, -1]

arr2[arr3 % 10 == 0]

array([[ 6,  7,  8,  9],
       [16, 17, 18, 19]])