In [1]:
import scipy.io
import numpy as np
import math

In [2]:
data = scipy.io.loadmat('fashion_mnist.mat')

In [3]:
data

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Feb  2 11:20:21 2021',
 '__version__': '1.0',
 '__globals__': [],
 'trX': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'trY': array([[0., 0., 0., ..., 1., 1., 1.]]),
 'tsX': array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.00392157, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0

In [4]:
trX = data['trX']
trY = data['trY']
tsX = data['tsX']
tsY = data['tsY']

## Feature Extraction

In [5]:
#Feature Extraction
X_train = np.array([[sample.mean(), sample.std()] for sample in trX])
X_test = np.array([[sample.mean(), sample.std()] for sample in tsX])
y_train = np.reshape(trY, -1)
y_test = np.reshape(tsY, -1)

# Naive Bayes Classifier

## Parameter Estimation

In [6]:
def separateByClass(X_train, y_train):
    """
    This method separates the data according to the distinct class values
    INPUT:
        X_train: training data containing all the features
        y_train: array of class labels corresponding to the training data
    OUTPUT: 
        returns class-wise data (dictionary)
    Note: The lengths of X_train and y_train should match
    """
    
    X_train_by_class = {} #a dictionary
    
    for i in range(len(X_train)):
        row = X_train[i]
        classValue = y_train[i]
        if(classValue not in X_train_by_class):
            X_train_by_class[classValue] = []
        X_train_by_class[classValue].append(row)
    
    return X_train_by_class

In [7]:
def calculateMeanVarianceParams(dataset):
    """
    This method calculates mean and variance for each feature column in the given dataset
    INPUT: 
        dataset: dataset containing the features
    OUTPUT: 
        returns list of mean and variance for each feature column
    """
    meanVarianceParams = [[np.mean(feature), np.var(feature)] for feature in zip(*dataset)]
    return meanVarianceParams

In [8]:
def calculateMeanAndVarByClass(X_train_by_class, y_train):
    """
    This method calculates mean and variance of the feature columns of the given dataset for each class separately
    INPUT:
        X_train_by_class: class-wise training data containing the features
        y_train: array of class labels corresponding to the training data
    OUTPUT:
        returns list of mean and variance for each feature column for each class
    """
    #model parameters - mean and variance
    meanAndVarianceByClass = {}
    for classValue in X_train_by_class.keys():
        meanAndVarianceByClass[classValue] = calculateMeanVarianceParams(X_train_by_class[classValue])
    return meanAndVarianceByClass

In [9]:
def trainModel(X_train_by_class, y_train):
    """
    This method trains the Naive Bayes model by estimating the model parameters - mean and variance
    INPUT:
        X_train_by_class: class-wise training data containing the features
        y_train: array of class labels corresponding to the training data
    OUTPUT:
        returns list of estimated model parameters by class and feature
    """
    modelParams = calculateMeanAndVarByClass(X_train_by_class, y_train)
    return modelParams

## Probability Calculations and Predictions

In [10]:
def calculateGaussianProbabilityDensity(x, mean, variance):
    """
    This method calculates Gaussian Probability Density 
    INPUT:
        x: data sample for which the probability density has to be calculated
        mean: mean of the distribution belonging to x
        variance: variance of the distribution belonging to x
    OUTPUT:
        returns the Gaussian probability density of the given data
    """
    #here the values may exceed 1 as the output is the probability density, not probability
    return (1 / math.sqrt(2*math.pi*variance)) * math.exp(-(math.pow(x-mean, 2) / (2 * variance)))

In [11]:
def calculatePriorProbabilities(X_train_by_class):
    """
    This method calculates the prior probabilities P(y) of all the class labels in the given class-wise training data
    INPUT: 
        X_train_by_class: class-wise training data containing the features
    OUTPUT:
        returns a list of prior probabilities for all the class labels
    """
    prior = {}
    totalLen = 0
    for classValue, data in X_train_by_class.items():
        totalLen += len(data)
    for classValue, data in X_train_by_class.items():
        prior[classValue] = len(data)/totalLen
    return prior

In [12]:
def calculateClassProbabilities(modelParams, prior, testInput):
    
    """
    This method calculates conditional probabilities P(X,y) = P(y)*p(X/y) for the given data sample
    P(y)*P(X|y) => P(y1)*P(X1|y1)*P(X2|y1) and P(y2)*P(X1|y2)*P(X2|y2) will be calculated
    INPUT:
        modelParams: list of estimated model paramters - mean and variance - for each class and feature
        prior: list of prior probabilities of all the class labels
        testInput: data sample for which the conditional probabilities have to be calculated
    OUTPUT:
        conditional probabilities for each class (dictionary)
    """
    
    classProbabilities = {}
    
    for classValue, params in modelParams.items():
        classProbabilities[classValue] = prior[classValue]
        for i in range(len(params)): #iterating through model parameters of each feature of this class
            mean, variance = params[i]
            x = testInput[i] #ith feature's value of the given test data
            classProbabilities[classValue] *= calculateGaussianProbabilityDensity(x, mean, variance)
    
    return classProbabilities

In [13]:
def predict(modelParams, prior, testInput):
    """
    This method predicts the best class label for the given test data
    INPUT:
        modelParams: list of estimated model paramters - mean and variance - for each class and feature
        prior: list of prior probabilities of all the class labels
        testInput: data sample for which the class label prediction has to be made
    OUTPUT:
        returns the best fitting class label for the given data sample
    """
    
    classProbabilities = calculateClassProbabilities(modelParams, prior, testInput)
    maxProb = -1
    bestClass = None
    for classValue, probability in classProbabilities.items():
        if(probability > maxProb):
            maxProb = probability
            bestClass = classValue
    return bestClass
            

In [14]:
def testModel(modelParams, X_train_by_class, X_test):
    """
    This method performs the testing part of the model; returns predictions of the class labels of the given test data
    INPUT:
        modelParams: list of estimated model paramters - mean and variance - for each class and feature
        X_train_by_class: class-wise training data containing the features
        X_test:
    OUTPUT:
        returns a list containing the predictions of all the data samples in the testing set X_test
    """
    prior = calculatePriorProbabilities(X_train_by_class)
    predictions = []
    for testInput in X_test:
        predictions.append(predict(modelParams, prior, testInput))
    return predictions

## Model Performance

In [15]:
try:
    from sklearn.metrics import confusion_matrix, classification_report #for reporting the model performance
except:
    pass

In [16]:
def getAccuracy(y_test, predictions):
    """
    This model calculates the accuracy of a model
    INPUT: 
        y_test: array of class labels of the testing data
        predictions: array of the class labels predicted by the trained model
    OUTPUT:
        returns the 'accuracy' in terms of percentage
    """
    correct = 0
    for i in range(len(y_test)):
        if(predictions[i] == y_test[i]):
            correct += 1
    accuracy = (correct / len(y_test)) * 100
    return accuracy

In [17]:
def printModelPerformance(y_test, predictions):
    """
    This method prints the model performance - confusion matrix and classification report 
    INPUT: 
        y_test: array of class labels of the testing data
        predictions: array of the class labels predicted by the trained model
    OUTPUT:
        prints the confusion matrix and the classification report
    """
    try:
        accuracy = getAccuracy(y_test, predictions)
        print('\nThe overall accuracy of the trained model is ' + str(accuracy) + '%')

        print('\nConfusion Matrix:')
        print(confusion_matrix(y_test, predictions))
        print(classification_report(y_test, predictions))
    except:
        pass
    

## Performing Naive Bayes Classification

In [18]:
def performNaiveBayes(X_train, y_train, X_test, y_test):
    """
    This method performs all the steps required for Naive Bayes Classifier
    INPUT:
        X_train: training data with features 
        y_train: class labels of training data
        X_test: testing data with features
        y_test: class labels of testing data
    OUTPUT:
        prints the model performance after performing training and testing
    """
    
    print('\n--- NAIVE BAYES CLASSIFIER ---')
    
    #Separating the training data by class
    X_train_by_class = separateByClass(X_train, y_train)
    
    #Training the model by estimating the parameters
    modelParams = trainModel(X_train_by_class, y_train)
    
    #Testing the model using the learned parameters
    predictions = testModel(modelParams, X_train_by_class, X_test)
    
    
    #Getting accuracy as a measure of model performance
    printModelPerformance(y_test, predictions)
    

In [19]:
performNaiveBayes(X_train, y_train, X_test, y_test)


--- NAIVE BAYES CLASSIFIER ---

The overall accuracy of the trained model is 83.15%

Confusion Matrix:
[[784 216]
 [121 879]]
              precision    recall  f1-score   support

         0.0       0.87      0.78      0.82      1000
         1.0       0.80      0.88      0.84      1000

    accuracy                           0.83      2000
   macro avg       0.83      0.83      0.83      2000
weighted avg       0.83      0.83      0.83      2000



# Logistic Regression

## Gradient Ascent

In [20]:
def sigmoid(X, weights):
    """
    This method calculates the sigmoid function value for the given inputs
    INPUT:
        X: input data vector (1,X1,X2,..)
        weights: weight vector (w0,w1,w2,...)
    OUTPUT:
        returns the result of sigmoid function
    """
    weightXproduct = np.dot(X, weights) #calculating w(Transpose).X
    return 1/(1 + np.exp(-weightXproduct))

In [21]:
def calculate_gradient(X, y, sigmoid_output):
    """
    This method calculates the gradient of the log-likelihood function (∂L(w)/∂w = (y-z)x where z is the sigmoid function)
    INPUT:
        X: input data vector (1,X1,X2,..)
        y: class label
        sigmoid_output: result of the sigmoid function calculated for X
    OUTPUT:
        returns the gradient of the l
    """
    return np.dot(y - sigmoid_output, X)

In [22]:
def update_weights(weights, learning_rate, gradient):
    """
    This method updates the weights for the next increment of gradient ascent
    INPUT:
        weights: weight vector (w0,w1,w2,...)
        learning_rate: rate of learning for the gradient ascent method
        gradient: gradient of log-likelihood function with respect to the current weight vector
    OUTPUT:
        returns the updated weight
    """
    return weights + (learning_rate * gradient)

## Logistic Regression - Training

In [23]:
def trainLogisticRegressionModel(X_train, y_train, learning_rate = 0.1, iterations = 10000):
    """
    This method trains the Logistic Regression model using the given training set
    INPUT:
        X_train: training data with features
        y_train: class lables of the training data
        learning_rate: rate of learning for the gradient ascent method (default value is 0.1)
        iterations: number of iterations to be performed for the gradient ascent method (default value is 10000)
    OUTPUT:
        returns the weights calculated
    """
    
    #Preparing X and y 
    intercept = np.ones((X_train.shape[0], 1)) # adding 1's to X to correspond to Wo weight
    X = np.concatenate((intercept, X_train), axis = 1) #concatenating 1's and training data to form [1, X1, X2, ..]
    y = y_train
    
    #Initializing weights to 0
    weights = np.zeros(X.shape[1])
    
    #Iterations
    for i in range(iterations):
        
        #Preparing inputs for updating weights
        sigmoid_output = sigmoid(X, weights)
        gradient = calculate_gradient(X, y, sigmoid_output)
        
        #Gradient Ascent - Updating weights
        weights = update_weights(weights, learning_rate, gradient)
        
    return weights

## Logistic Regression - Testing

In [24]:
def testLogisticRegressionModel(X_test, weights):
    """
    This method performs the testing of the trained Logistic Regression model
    INPUT:
        X_test: testing dataset with all the features
        weights: weight vector (w0,w1,w2,...) of the trained model
    OUTPUT:
        returns the predictions of all the test data samples
    """
    intercept = np.ones((X_test.shape[0], 1)) # adding 1's to X to correspond to Wo weight
    testData = np.concatenate((intercept, X_test), axis = 1) #concatenating 1's and training data to form [1, X1, X2, ..]
    
    sigmoid_output = sigmoid(testData, weights)

    predictions = []
    for res in sigmoid_output:
        if res > 0.5:
            predictions.append(1)
        else:
            predictions.append(0)

    return predictions

## Performing Logistic Regression

In [25]:
def performLogisticRegression(X_train, y_train, X_test, y_test, learning_rate = 0.1, iterations = 10000):
    """
    This method performs the training and testing of Logistic Regression model
    INPUT:
        X_train: training dataset with features
        y_train: class lables of the training dataset
        X_test: testing dataset with all the features
        y_test: class labels of the testing dataset
        learning_rate: rate of learning for the gradient ascent method (default value is 0.1)
        iterations: number of iterations to be performed for the gradient ascent method (default value is 10000)
    
    """
    
    print('\n--- LOGISTIC REGRESSION ---')
    
    #Training the model by updating weights using Gradient Ascent technique
    weights = trainLogisticRegressionModel(X_train, y_train, learning_rate, iterations)
    
    #Testing the model using the updated weights
    predictions = testLogisticRegressionModel(X_test, weights)
    
    #Getting the model performance
    printModelPerformance(y_test, predictions)

In [26]:
performLogisticRegression(X_train, y_train, X_test, y_test, learning_rate = 0.1, iterations = 10000)


--- LOGISTIC REGRESSION ---

The overall accuracy of the trained model is 92.2%

Confusion Matrix:
[[928  72]
 [ 84 916]]
              precision    recall  f1-score   support

         0.0       0.92      0.93      0.92      1000
         1.0       0.93      0.92      0.92      1000

    accuracy                           0.92      2000
   macro avg       0.92      0.92      0.92      2000
weighted avg       0.92      0.92      0.92      2000

