# AP4 - Pattern Recognition
Implementation of Naive Bayes and Linear Discriminant Analysis(LDA) for classification.

> Name: Jonas Carvalho Fortes

> Mat: 494513

## Load Datasets

In [4]:
import pandas as pd
import numpy as np
from scipy.io import loadmat

dataset_input1 = loadmat('data/Input1.mat')
dataset_input1 = pd.DataFrame(dataset_input1['Input1']).transpose()
print(f'Dataset Input1 shape: {dataset_input1.shape}')

dataset_input2 = loadmat('data/Input2.mat')
dataset_input2 = pd.DataFrame(dataset_input2['Input2']).transpose()
print(f'Dataset Input2 shape: {dataset_input2.shape}')

Dataset Input1 shape: (4000, 2)
Dataset Input2 shape: (4000, 2)


In [5]:
# Create labels for the classes (1 for samples 1-2000, 2 for 2001-4000)
labels = np.array([1]*2000 + [2]*2000)

In [6]:
print('Dataset Input1:')
dataset_input1

Dataset Input1:


Unnamed: 0,0,1
0,0.545786,1.187656
1,0.369962,1.190989
2,1.713438,-0.191912
3,1.455538,0.378509
4,1.218019,0.646772
...,...,...
3995,0.458675,-1.473266
3996,-0.177021,-1.730604
3997,0.309656,-1.767054
3998,0.913877,-1.752652


In [7]:
print('Dataset Input2:')
dataset_input2

Dataset Input2:


Unnamed: 0,0,1
0,0.686500,0.494410
1,0.760307,0.609878
2,0.053486,0.811601
3,1.173823,0.359015
4,0.328171,0.907802
...,...,...
3995,0.392458,0.208384
3996,-0.127486,-0.190746
3997,0.057968,-0.320305
3998,1.639673,0.866730


## Helper functions and classes

### Naive Bayes Classifier Modeling

In [8]:
class NaiveBayesClassifier:
    
    def __init__(self):
        self.mean_class1 = None
        self.var_class1 = None
        self.mean_class2 = None
        self.var_class2 = None
    
    def fit(self, X, y):
        # Separate the samples for each class
        X_class1 = X[y == 1]
        X_class2 = X[y == 2]
        
        # Calculate the mean and variance for each class
        self.mean_class1 = X_class1.mean(axis=0)
        self.var_class1 = X_class1.var(axis=0)
        
        self.mean_class2 = X_class2.mean(axis=0)
        self.var_class2 = X_class2.var(axis=0)
    
    def gaussian_probability(self, x, mean, var):
        exponent = np.exp(- ((x - mean) ** 2) / (2 * var))
        return (1 / np.sqrt(2 * np.pi * var)) * exponent
    
    def predict(self, X):
        predictions = []
        
        for i in range(X.shape[0]):
            sample = X.iloc[i, :]
            # Calculate the probability for Class 1
            prob_class1 = np.prod(self.gaussian_probability(sample, self.mean_class1, self.var_class1))
            # Calculate the probability for Class 2
            prob_class2 = np.prod(self.gaussian_probability(sample, self.mean_class2, self.var_class2))
            
            # Compare the probabilities and assign the class with the highest probability
            if prob_class1 > prob_class2:
                predictions.append(1)
            else:
                predictions.append(2)
        
        return np.array(predictions)

### LDA Classifier Modeling

In [18]:
class LDAClassifier:
    
    def __init__(self):
        self.w = None  # projection vector (LDA direction)
        self.threshold = None  # decision threshold
    
    def fit(self, X, y):
        # Separate the samples for each class
        X_class1 = X[y == 1]
        X_class2 = X[y == 2]
        
        # Calculate the mean for each class
        mean_class1 = np.mean(X_class1, axis=0)
        mean_class2 = np.mean(X_class2, axis=0)
        
        # Calculate the covariance matrices for each class
        S1 = np.cov(X_class1, rowvar=False)
        S2 = np.cov(X_class2, rowvar=False)
        
        # Calculate the within-class scatter matrix
        Sw = S1 + S2
        
        # Calculate the projection vector w = Sw^(-1) * (mean_class1 - mean_class2)
        self.w = np.linalg.inv(Sw).dot(mean_class1 - mean_class2)
        
        # Project the class means onto the projection vector
        mean1_proj = np.dot(mean_class1, self.w)
        mean2_proj = np.dot(mean_class2, self.w)
        
        # Calculate the decision threshold
        self.threshold = (mean1_proj + mean2_proj) / 2
    
    def predict(self, X):
        # Project the samples onto the projection vector
        X_proj = np.dot(X, self.w)
        
        # Classify the samples based on the projection values and the decision threshold
        predictions = np.where(X_proj > self.threshold, 1, 2)
        return predictions

### Auxiliary Functions

In [10]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

def plot_2d_scatter(X, y):
    """
    Plot a 2D scatter plot of the input data.
    
    Parameters:
    - X: DataFrame or Array-like with the input data (continuous features).
    - y: Array-like with the target labels.
    """
    plt.figure(figsize=(10, 6))
    
    # Plotar Classe 1
    plt.scatter(X[y == 1].iloc[:, 0], X[y == 1].iloc[:, 1], color='blue', label='Classe 1', alpha=0.6)
    
    # Plotar Classe 2
    plt.scatter(X[y == 2].iloc[:, 0], X[y == 2].iloc[:, 1], color='red', label='Classe 2', alpha=0.6)
    
    # Configurações do gráfico
    plt.title('Gráfico de Dispersão 2D dos Dados de Entrada')
    plt.xlabel('Atributo 1')
    plt.ylabel('Atributo 2')
    plt.legend()
    plt.show()

## Test


In [14]:
# Inicializar o classificador
nb_classifier = NaiveBayesClassifier()

# Treinar o modelo
nb_classifier.fit(dataset_input1, labels)

# Fazer previsões
predictions = nb_classifier.predict(dataset_input1)

# Exibir as primeiras previsões
print(predictions[:10])

accuracy_score(labels, predictions)

[1 1 1 1 1 1 1 1 1 1]


1.0

In [19]:
# Inicializar e treinar o classificador LDA
lda_classifier = LDAClassifier()


lda_classifier.fit(dataset_input1, labels)

# Fazer previsões
lda_predictions = lda_classifier.predict(dataset_input1)

# Calcular a acurácia
accuracy_score(labels, lda_predictions)



1.0