In [29]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from scipy.stats import norm

In [30]:
X, y = make_blobs(n_samples=10000, centers=2, n_features = 2, random_state=1)

print(X.shape, y.shape)
print(X[:5])
print(y[:5])

(10000, 2) (10000,)
[[-3.08389358  5.70067218]
 [-8.80258525 -5.07389013]
 [-1.68452735  5.22511143]
 [-1.44683075  4.51471432]
 [-3.36067232  3.22371079]]
[0 1 0 0 0]


In [31]:
class NaiveBayes:
    def __init__(self, X,y):
        self.X = X
        self.y = y
    
    def splitToTrainTest(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size = 0.1, random_state=0)
        
        return X_train, X_test, y_train, y_test
    
    def fit_distribution(self, X):
        mu = np.mean(X)
        std = np.std(X)
        
        dist = norm(mu, std)
        
        return dist
    
    def posterior(self, x, prior, dist1, dist2):
        
        return prior * dist1.pdf(x[0]) *dist2.pdf(x[1])
    
    def run_model(self):
        
        self.X_train, self.X_test, self.y_train, self.y_test = self.splitToTrainTest(self.X, self.y)
        
        self.X0_train = self.X_train[self.y_train == 0]
        self.X1_train = self.X_train[self.y_train == 1]
        
        #Calculate the prior
        
        self.prior_y0 = len(self.X0_train)/len(self.X_train)
        self.prior_y1 = len(self.X1_train)/len(self.X_train)
        
        #claculate the liklihoods for y==0
        
        self.dist_X0y0 = self.fit_distribution(self.X0_train[:,0])
        self.dist_X1y0 = self.fit_distribution(self.X0_train[:,1])
        
        #claculate the liklihoods for y==1
        
        self.dist_X0y1 = self.fit_distribution(self.X1_train[:,0])
        self.dist_X1y1 = self.fit_distribution(self.X1_train[:,1])
        
    def predict(self):
        
        for sample, target in zip(self.X_test, self.y_test):
            py0 = self.posterior(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)
            py1 = self.posterior(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
            print('P(Y=0 | %s) = %.3f' % (sample, py0 *100))
            print('P(Y=1 | %s) = %.3f' % (sample, py1 *100))
            
            #print('Model predicted class {} and the true class is {}' .format(np.argmax[py0*100, py1 *100]), sample)
            
    
    
    
    
    

In [32]:
classifier = NaiveBayes(X, y)


classifier.run_model()
classifier.predict()

P(Y=0 | [-1.50030244  5.11349108]) = 6.146
P(Y=1 | [-1.50030244  5.11349108]) = 0.000
P(Y=0 | [-9.70304468 -3.64272044]) = 0.000
P(Y=1 | [-9.70304468 -3.64272044]) = 7.302
P(Y=0 | [-11.12942627  -3.09708926]) = 0.000
P(Y=1 | [-11.12942627  -3.09708926]) = 2.839
P(Y=0 | [-10.11438309  -5.52374255]) = 0.000
P(Y=1 | [-10.11438309  -5.52374255]) = 2.269
P(Y=0 | [-0.61779984  4.65345407]) = 4.616
P(Y=1 | [-0.61779984  4.65345407]) = 0.000
P(Y=0 | [-1.25940623  7.12387611]) = 0.175
P(Y=1 | [-1.25940623  7.12387611]) = 0.000
P(Y=0 | [-8.30755615 -2.43706693]) = 0.000
P(Y=1 | [-8.30755615 -2.43706693]) = 0.636
P(Y=0 | [-10.7898651   -4.16875473]) = 0.000
P(Y=1 | [-10.7898651   -4.16875473]) = 5.590
P(Y=0 | [-0.94705508  6.17867704]) = 1.281
P(Y=1 | [-0.94705508  6.17867704]) = 0.000
P(Y=0 | [-3.2358029   5.15361711]) = 1.703
P(Y=1 | [-3.2358029   5.15361711]) = 0.000
P(Y=0 | [-0.97719593  3.90794799]) = 5.646
P(Y=1 | [-0.97719593  3.90794799]) = 0.000
P(Y=0 | [-9.36394715 -4.11939289]) = 0.000

P(Y=1 | [-10.50012464  -4.20389049]) = 6.716
P(Y=0 | [-2.47104933  3.31915514]) = 3.088
P(Y=1 | [-2.47104933  3.31915514]) = 0.000
P(Y=0 | [-1.29255568  6.07321996]) = 1.841
P(Y=1 | [-1.29255568  6.07321996]) = 0.000
P(Y=0 | [0.83938352 2.87483912]) = 0.114
P(Y=1 | [0.83938352 2.87483912]) = 0.000
P(Y=0 | [-9.09262641 -3.64886021]) = 0.000
P(Y=1 | [-9.09262641 -3.64886021]) = 5.180
P(Y=0 | [-8.61135164 -4.31237315]) = 0.000
P(Y=1 | [-8.61135164 -4.31237315]) = 2.993
P(Y=0 | [-0.80856307  4.5198494 ]) = 5.633
P(Y=1 | [-0.80856307  4.5198494 ]) = 0.000
P(Y=0 | [-2.57933388  6.10094989]) = 1.202
P(Y=1 | [-2.57933388  6.10094989]) = 0.000
P(Y=0 | [-2.1845728  4.0986218]) = 6.553
P(Y=1 | [-2.1845728  4.0986218]) = 0.000
P(Y=0 | [-9.38596159 -4.54681368]) = 0.000
P(Y=1 | [-9.38596159 -4.54681368]) = 5.620
P(Y=0 | [-9.46417852 -3.52166113]) = 0.000
P(Y=1 | [-9.46417852 -3.52166113]) = 6.368
P(Y=0 | [-10.10197928  -3.51247768]) = 0.000
P(Y=1 | [-10.10197928  -3.51247768]) = 7.143
P(Y=0 | [-11.

P(Y=0 | [-1.29524445  3.68187218]) = 5.743
P(Y=1 | [-1.29524445  3.68187218]) = 0.000
P(Y=0 | [-2.90545874  4.35417075]) = 3.598
P(Y=1 | [-2.90545874  4.35417075]) = 0.000
P(Y=0 | [-1.8656183   3.72333183]) = 6.117
P(Y=1 | [-1.8656183   3.72333183]) = 0.000
P(Y=0 | [-1.37222928  4.39627936]) = 7.702
P(Y=1 | [-1.37222928  4.39627936]) = 0.000
P(Y=0 | [-0.349854    3.25150723]) = 1.766
P(Y=1 | [-0.349854    3.25150723]) = 0.000
P(Y=0 | [-9.18214459 -4.8523494 ]) = 0.000
P(Y=1 | [-9.18214459 -4.8523494 ]) = 3.889
P(Y=0 | [-9.78478307 -1.45365636]) = 0.000
P(Y=1 | [-9.78478307 -1.45365636]) = 0.333
P(Y=0 | [-0.16011279  3.71216589]) = 2.104
P(Y=1 | [-0.16011279  3.71216589]) = 0.000
P(Y=0 | [-1.53895281  4.23207526]) = 7.812
P(Y=1 | [-1.53895281  4.23207526]) = 0.000
P(Y=0 | [-1.62474784  3.19978086]) = 3.783
P(Y=1 | [-1.62474784  3.19978086]) = 0.000
P(Y=0 | [-9.6385144 -3.729419 ]) = 0.000
P(Y=1 | [-9.6385144 -3.729419 ]) = 7.333
P(Y=0 | [-3.23670692  6.54971833]) = 0.219
P(Y=1 | [-3.236