In [43]:
import numpy as np 
import pandas as pd 	
import matplotlib.pyplot as plt 
import math

**bayes theorem** for multiple features 

`P(y | x1,x2,...,xn) = ` <br>
`P(y) * P(x1 | y) * P(x2 | y) * ... * P(xn | y)/ (P(x1) * P(x2) * ... * P(xn))`

<img src="../static/bayes_theorem.png" alt="bayes theorem"
    height="300px" width="500px" style="filter: brightness(80%);">



In [80]:
class NaiveBayes:
	def __init__(self):
		self.features = list
		self.likelihoods = {}
		self.class_priors = {}
		self.pred_priors = {}

		self.X_train = np.array
		self.y_train = np.array
		self.train_size = int
		self.num_feats = int

	def fit(self, X, y):
		self.features = list(X.columns)
		self.X_train = X
		self.y_train = y
		self.train_size = X.shape[0]
		self.num_feats = X.shape[1]

		for feature in self.features:
			self.likelihoods[feature] = {}
			self.pred_priors[feature] = {}

			for feat_val in np.unique(self.X_train[feature]):
				self.pred_priors[feature].update({feat_val: 0})

				for outcome in np.unique(self.y_train):
					self.likelihoods[feature].update({feat_val+'_'+outcome:0})
					self.class_priors.update({outcome: 0})

		self.prior_probability()
		self.calc_likelihoods()
		self.calc_predictor_prior()

	def prior_probability(self):
		for tag in self.y_train.unique():
			self.class_priors[tag] = sum(self.y_train==tag)/self.train_size

	def calc_likelihoods(self):
		for feature in self.features:
			for tag in self.y_train.unique():
				total_count = sum(self.y_train==tag)
				feature_likelihood = self.X_train[feature][self.y_train==tag].value_counts().to_dict()
				# print(feature_likelihood.value_counts().to_dict())
				for feature_val, count in feature_likelihood.items():
					self.likelihoods[feature][feature_val + "_"+tag] = count/ total_count

	def calc_predictor_prior(self):
		for feature in self.features:
			feature_vals = self.X_train[feature].value_counts().to_dict()
			for feature_val, count in feature_vals.items():
				self.pred_priors[feature][feature_val] = count/self.train_size

	def predict(self, X):
			results = []
			X = np.array(X)

			for query in X:
				probs_outcome = {}
				for outcome in np.unique(self.y_train):
					prior = self.class_priors[outcome]
					likelihood = 1
					evidence = 1

					for feat, feat_val in zip(self.features, query):
						likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]
						evidence *= self.pred_priors[feat][feat_val]

					posterior = (likelihood * prior) / (evidence)

					probs_outcome[outcome] = posterior

				result = max(probs_outcome, key = lambda x: probs_outcome[x])
				results.append(result)

			return np.array(results)


In [81]:
def pre_processing(df):
    X = df.drop(df.columns[-1], axis=1)
    y = df[df.columns[-1]]
    return X,y

In [82]:
def accuracy_score(y_true, y_pred):
	return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)

In [83]:
df = pd.read_table("weather.txt", delimiter=",", skipinitialspace=True)
X,y = pre_processing(df)

nb_classifier = NaiveBayes()
nb_classifier.fit(X,y)

pred = nb_classifier.predict(X)

print(f"Accuracy: {accuracy_score(y, pred)}")

Accuracy: 92.86


In [88]:
test = pd.DataFrame([
    ['Rainy','Mild', 'Normal', 't'],
    ['Overcast', 'Cool', 'Normal', 't'],
    ['Sunny', 'Hot', 'High', 't']
    ])
print(test)
nb_classifier.predict(test)

          0     1       2  3
0     Rainy  Mild  Normal  t
1  Overcast  Cool  Normal  t
2     Sunny   Hot    High  t


array(['yes', 'yes', 'no'], dtype='<U3')