In [3]:
import csv
import random
import math

def load_csv(filename):
    with open(filename) as file:
        next(file)
        fd = csv.reader(file)
        return [list(map(float, row)) for row in fd]

def split_dataset(dataset, ratio):
    train_size = int(len(dataset) * ratio)
    random.shuffle(dataset)
    return dataset[:train_size], dataset[train_size:]

def separate_by_class(dataset):
    separated = {}
    for row in dataset:
        cls = row[-1]
        separated.setdefault(cls, []).append(row)
    return separated

def summarize(dataset):
    def mean(numbers): 
        return sum(numbers) / len(numbers)
    def stdev(numbers):
        avg = mean(numbers)
        variance = sum((x - avg) ** 2 for x in numbers) / (len(numbers) - 1)
        return math.sqrt(variance)

    columns = list(zip(*dataset))
    return [(mean(col), stdev(col)) for col in columns[:-1]]

def calculate_probability(x, mean, stdev):
    exponent = math.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
    return (exponent / (math.sqrt(2 * math.pi) * stdev)) 

def predict(summaries, x):
    probabilities = {}
    for cls, stats in summaries.items():
        probabilities[cls] = 1
        for i, (mean, stdev) in enumerate(stats):
            probabilities[cls] *= calculate_probability(x[i], mean, stdev)
    return max(probabilities, key=probabilities.get)

def get_accuracy(test_set, predictions):
    correct = sum(1 for i in range(len(test_set)) if test_set[i][-1] == predictions[i])
    return (correct / len(test_set)) * 100

dataset = load_csv('diabetes.csv')
train_set, test_set = split_dataset(dataset, 0.8)
print(f'Split {len(dataset)} rows into training={len(train_set)} and testing={len(test_set)} rows')
    
separated = separate_by_class(train_set)
summaries = {cls: summarize(rows) for cls, rows in separated.items()}

predictions = [predict(summaries, row[:-1]) for row in test_set] 
print(f'Classification Accuracy: {get_accuracy(test_set, predictions):.2f}%')


Split 768 rows into training=614 and testing=154 rows
Classification Accuracy: 72.73%
