In [1]:
from math import sqrt, pi, exp
from statistics import mean, stdev
from files import readFile

def separate_by_class(dataset):
	separated = {}
	for d in dataset:
		separated.setdefault(d[-1], []).append(d)
	return separated

def summarize_dataset(dataset):
	return [(mean(column), stdev(column), len(column)) for column in list(zip(*dataset))[:-1]]

def summarize_by_class(dataset):
	return {key: summarize_dataset(value) for key, value in separate_by_class(dataset).items()}

def calculate_probability(x, mean, stdev):
	exponent = exp(-((x-mean)**2 / (2 * stdev**2 )))
	return (1 / (sqrt(2 * pi) * stdev)) * exponent
 
def calculate_class_probabilities(summaries, test_row):
	total_rows = sum([summaries[label][0][2] for label in summaries])
	probabilities = {}
	for key, value in summaries.items():
		probabilities[key] = summaries[key][0][2]/total_rows
		for i, v in enumerate(value):
			probabilities[key] *= calculate_probability(test_row[i], v[0], v[1])
	return probabilities

dataset = readFile("Breast_cancer_data.csv")
summaries = summarize_by_class(dataset)
probabilities = calculate_class_probabilities(summaries, dataset[0])
print(probabilities)

{0: 0.05032427673372076, 1: 0.00011557718379945765}
