<a href="https://colab.research.google.com/github/RitneshThakur/AI/blob/main/Naive_Bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd

In [6]:
class  NaiveBayes:

    def __init__(self):

        """
        Attributes:

        likelihoods: Likelihood of each feature per class
        class_priors: Prior probabilities of classes
        pred_priors: Prior probabilities of features
        features: All features of dataset
        """
        self.features = list
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}

        self.X_train = np.array
        self.y_train = np.array
        self.train_size = int
        self.num_feats = int

    def fit(self, X, y):

        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}

            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature].update({feat_val: 0})

                for outcome in np.unique(self.y_train):
                    self.likelihoods[feature].update({feat_val+'_'+outcome:0})
                    self.class_priors.update({outcome: 0})

        self._calc_class_prior()
        self._calc_likelihoods()
        self._calc_predictor_prior()

    def _calc_class_prior(self):

        """ P(c) - Prior Class Probability """

        for outcome in np.unique(self.y_train):
            outcome_count = sum(self.y_train == outcome)
            self.class_priors[outcome] = outcome_count / self.train_size

    def _calc_likelihoods(self):

        """ P(x|c) - Likelihood """

        for feature in self.features:

            for outcome in np.unique(self.y_train):
                outcome_count = sum(self.y_train == outcome)
                feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()

                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count


    def _calc_predictor_prior(self):

        """ P(x) - Evidence """

        for feature in self.features:
            feat_vals = self.X_train[feature].value_counts().to_dict()

            for feat_val, count in feat_vals.items():
                self.pred_priors[feature][feat_val] = count/self.train_size


    def predict(self, X):

        """ Calculates Posterior probability P(c|x) """

        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}
            for outcome in np.unique(self.y_train):
                        prior = self.class_priors[outcome]
                        likelihood = 1
                        evidence = 1

                        for feat, feat_val in zip(self.features, query):
                            likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]
                            evidence *= self.pred_priors[feat][feat_val]

                        # posterior = (likelihood * prior) / (evidence)
                        posterior = (likelihood * prior)

                        probs_outcome[outcome] = posterior

            result = max(probs_outcome, key = lambda x: probs_outcome[x])
            print(probs_outcome)
            results.append(result)

        return np.array(results)

In [7]:
def accuracy_score(y_true, y_pred):

    """	score = (y_true - y_pred) / len(y_true) """

    return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)

In [8]:
data = {
    "Color": ["White", "Green", "Green", "White", "Green", "White", "White", "White"],
    "Legs": [3, 2, 3, 3, 2, 2, 2, 2],
    "Height": ["Short", "Tall", "Short", "Short", "Short", "Tall", "Tall", "Short"],
    "Smelly": ["Yes", "No", "Yes", "Yes", "No", "No", "No", "Yes"],
    "Species": ["M", "M", "M", "M", "H", "H", "H", "H"]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Color,Legs,Height,Smelly,Species
0,White,3,Short,Yes,M
1,Green,2,Tall,No,M
2,Green,3,Short,Yes,M
3,White,3,Short,Yes,M
4,Green,2,Short,No,H
5,White,2,Tall,No,H
6,White,2,Tall,No,H
7,White,2,Short,Yes,H


In [9]:
df['Legs'] = df['Legs'].astype('str')
X = df.drop(['Species'], axis = 1)
y = df['Species']
nb_clf = NaiveBayes()
nb_clf.fit(X, y)

print("Train Accuracy: {}".format(accuracy_score(y, nb_clf.predict(X))))

{'H': 0.0, 'M': 0.10546875}
{'H': 0.046875, 'M': 0.00390625}
{'H': 0.0, 'M': 0.10546875}
{'H': 0.0, 'M': 0.10546875}
{'H': 0.046875, 'M': 0.01171875}
{'H': 0.140625, 'M': 0.00390625}
{'H': 0.140625, 'M': 0.00390625}
{'H': 0.046875, 'M': 0.03515625}
Train Accuracy: 87.5


In [10]:
query1 = np.array([['Green','2','Short','Yes']])
print("prediction 1:- {} ---> {}".format(query1, nb_clf.predict(query1)))
print("\n")
query2 = np.array([['White','2','Short','Yes']])
print("prediction 2:- {} ---> {}".format(query2, nb_clf.predict(query2)))

{'H': 0.015625, 'M': 0.03515625}
prediction 1:- [['Green' '2' 'Short' 'Yes']] ---> ['M']


{'H': 0.046875, 'M': 0.03515625}
prediction 2:- [['White' '2' 'Short' 'Yes']] ---> ['H']


In [11]:
nb_clf.pred_priors

{'Color': {'Green': 0.375, 'White': 0.625},
 'Legs': {'2': 0.625, '3': 0.375},
 'Height': {'Short': 0.625, 'Tall': 0.375},
 'Smelly': {'No': 0.5, 'Yes': 0.5}}