## 01 - Naive Bayesian - Gaussian - Lab
===Task===

* Generate a 2 class data using sklearn.
* Put Gaussian Naive Bayesian Classification into class
* Fit the model on the data then calculate accuracy accordingly.

In [72]:
# Import library
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the data
X, y = make_classification(n_samples=500, n_features=10, n_informative=4,
                             n_clusters_per_class=2, random_state=14)

# Scale
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

print(X.shape)
print(y.shape)
print(np.unique(y))

(500, 10)
(500,)
[0 1]


In [73]:
class GaussianNaive:
    def fit(self, X, y):
        n = X.shape[1]
        self.k =len(np.unique(y))
        self.mean = np.zeros((self.k, n))
        self.std = np.zeros((self.k, n))
        m = np.zeros(self.k)
        for label in range(self.k):
            self.mean[label, :] = X[y==label].mean(axis=0)
            self.std[label, :]  = X[y==label].std(axis=0)
            m[label] = len(X[y==label])
        self.prior = m/sum(m)

    def gaussian_pdf(self, X, mean, std):
        left = 1 / (np.sqrt(2 * np.pi) * std)
        e = (X - mean) ** 2 / (2 * (std ** 2))
        right = np.exp(-e)
        return left*right

    def predict(self, X):
        posterior = np.zeros((X.shape[0], self.k))
        for label in range(self.k):
            likelihood = self.gaussian_pdf(X, self.mean[label,:], self.std[label,:])
            total_likelihood = np.prod(likelihood, axis=1)
            posterior[:,label] = self.prior[label]*total_likelihood
        yhat = np.argmax(posterior, axis=1)
        return yhat


In [74]:
model = GaussianNaive()
model.fit(X_train, y_train)
yhat = model.predict(X_test)


In [75]:
from sklearn.metrics import average_precision_score, classification_report
print("=========Average precision score=======")
print(average_precision_score(y_test, yhat))
print("\n=========Classification report=======")
print("Report: ", classification_report(y_test, yhat))

0.9065986724947386

Report:                precision    recall  f1-score   support

           0       0.78      0.98      0.87        63
           1       0.99      0.80      0.89        87

    accuracy                           0.88       150
   macro avg       0.89      0.89      0.88       150
weighted avg       0.90      0.88      0.88       150

