In [1]:
from math import sqrt, pi, exp, floor
from statistics import mean, stdev
from files import readFile
import random

def separate_by_class(dataset):
	separated = {}
	for d in dataset:
		separated.setdefault(d[-1], []).append(d)
	return separated

def summarize_dataset(dataset):
	return [(mean(column), stdev(column), len(column)) for column in list(zip(*dataset))[:-1]]

def summarize_by_class(dataset):
	return {key: summarize_dataset(value) for key, value in separate_by_class(dataset).items()}

def calculate_probability(x, mean, stdev):
	exponent = exp(-((x-mean)**2 / (2 * stdev**2 )))
	return (1 / (sqrt(2 * pi) * stdev)) * exponent
 
def calculate_class_probabilities(summaries, test_row):
	total_rows = sum([summaries[label][0][2] for label in summaries])
	probabilities = {}
	for key, value in summaries.items():
		probabilities[key] = summaries[key][0][2]/total_rows
		for i, v in enumerate(value):
			probabilities[key] *= calculate_probability(test_row[i], v[0], v[1])
	return probabilities

dataset = readFile("Breast_cancer_data.csv")

errors = []
for i in range(5):
	random.shuffle(dataset)
	x = floor(len(dataset) * 0.8)
	train = dataset[:x]
	test = dataset[x:]
	error = 0
	for t in test:
		summaries = summarize_by_class(train)
		probabilities = calculate_class_probabilities(summaries, t)
		print("Probabilities:", probabilities)
		result = max(probabilities, key=probabilities.get)
		print("Result:", result)
		if result != t[-1]:
			error += 1
	errors.append(error / len(test))
print("Errors average:", mean(errors))

Probabilities: {1.0: 1.915740065452301e-05, 0.0: 9.908757509891669e-08}
Result: 1.0
Probabilities: {1.0: 3.0738526564035385e-06, 0.0: 1.318810276594018e-09}
Result: 1.0
Probabilities: {1.0: 1.2842219682609218e-05, 0.0: 8.180671202742036e-09}
Result: 1.0
Probabilities: {1.0: 4.343134038980991e-17, 0.0: 1.9722801314344422e-11}
Result: 0.0
Probabilities: {1.0: 1.4200654343360855e-09, 0.0: 1.171887904590181e-08}
Result: 0.0
Probabilities: {1.0: 1.1608478846357357e-06, 0.0: 4.0735307700635195e-07}
Result: 1.0
Probabilities: {1.0: 2.1618901468258484e-30, 0.0: 1.3636917456409312e-07}
Result: 0.0
Probabilities: {1.0: 1.497364943461681e-09, 0.0: 1.4931536933956855e-11}
Result: 1.0
Probabilities: {1.0: 6.575380143687953e-09, 0.0: 1.3183164192617955e-06}
Result: 0.0
Probabilities: {1.0: 1.3162202689948857e-05, 0.0: 2.2732708326676046e-08}
Result: 1.0
Probabilities: {1.0: 5.550658337293842e-13, 0.0: 1.88078303496253e-06}
Result: 0.0
Probabilities: {1.0: 3.4915243510150595e-05, 0.0: 2.2126308086800