In [1]:
from math import sqrt, pi, exp, floor
from statistics import mean, stdev
from files import readFile
import random

def separate_by_class(dataset):
	separated = {}
	for d in dataset:
		separated.setdefault(d[-1], []).append(d)
	return separated

def summarize_dataset(dataset):
	return [(mean(column), stdev(column), len(column)) for column in list(zip(*dataset))[:-1]]

def summarize_by_class(dataset):
	return {key: summarize_dataset(value) for key, value in separate_by_class(dataset).items()}

def calculate_probability(x, mean, stdev):
	exponent = exp(-((x-mean)**2 / (2 * stdev**2 )))
	return (1 / (sqrt(2 * pi) * stdev)) * exponent
 
def calculate_class_probabilities(summaries, test_row):
	total_rows = sum([summaries[label][0][2] for label in summaries])
	probabilities = {}
	for key, value in summaries.items():
		probabilities[key] = summaries[key][0][2]/total_rows
		for i, v in enumerate(value):
			probabilities[key] *= calculate_probability(test_row[i], v[0], v[1])
	return probabilities

dataset = readFile("Breast_cancer_data.csv")

errors = []
for i in range(5):
	random.shuffle(dataset)
	x = floor(len(dataset) * 0.8)
	train = dataset[:x]
	test = dataset[x:]
	error = 0
	for t in test:
		summaries = summarize_by_class(train)
		probabilities = calculate_class_probabilities(summaries, t)
		print("Probabilities:", probabilities)
		result = max(probabilities, key=probabilities.get)
		print("Result:", result)
		if result != t[-1]:
			error += 1
	errors.append(error / len(test))
print("Errors average:", mean(errors))

Probabilities: {1.0: 3.0487453819530277e-16, 0.0: 1.4199019259091254e-06}
Result: 0.0
Probabilities: {1.0: 2.4409668097421586e-05, 0.0: 6.21814226066412e-08}
Result: 1.0
Probabilities: {1.0: 1.3103002666024024e-05, 0.0: 7.963417623953572e-08}
Result: 1.0
Probabilities: {1.0: 6.410105279983567e-16, 0.0: 1.7081138218470326e-06}
Result: 0.0
Probabilities: {1.0: 1.8191638496444034e-14, 0.0: 1.8409852865132298e-06}
Result: 0.0
Probabilities: {1.0: 4.460890744167532e-06, 0.0: 3.563689698936966e-10}
Result: 1.0
Probabilities: {1.0: 6.198014823786879e-06, 0.0: 1.1709606439690363e-08}
Result: 1.0
Probabilities: {1.0: 3.5214381588512783e-07, 0.0: 4.815147108472286e-10}
Result: 1.0
Probabilities: {1.0: 3.770727949681504e-06, 0.0: 2.9360804585874977e-09}
Result: 1.0
Probabilities: {1.0: 1.8662753373338384e-06, 0.0: 3.587842170749188e-07}
Result: 1.0
Probabilities: {1.0: 2.712382689690547e-06, 0.0: 2.826601032828558e-07}
Result: 1.0
Probabilities: {1.0: 1.9298157198201117e-13, 0.0: 1.65844291280376