In [23]:
import math
import random
import pandas as pd
import numpy as np

# Fix random seed for reproducibility
random.seed(1)


In [2]:
def encode_class(mydata):
    classes = []
    for row in mydata:
        if row[-1] not in classes:
            classes.append(row[-1])
    for i in range(len(classes)):
        for row in mydata:
            if row[-1] == classes[i]:
                row[-1] = i
    return mydata


In [3]:
def splitting(mydata, ratio):
    train_num = int(len(mydata) * ratio)
    train = []
    test = list(mydata)

    while len(train) < train_num:
        index = random.randrange(len(test))
        train.append(test.pop(index))
    return train, test


In [4]:
def splitting(mydata, ratio):
    train_size = int(len(mydata) * ratio)
    train = []
    test = list(mydata)

    while len(train) < train_size:
        index = random.randrange(len(test))
        train.append(test.pop(index))

    return train, test


In [5]:
def groupUnderClass(mydata):
    groups = {}
    for row in mydata:
        groups.setdefault(row[-1], []).append(row)
    return groups


In [6]:
def MeanAndStdDev(numbers):
    mean = np.mean(numbers)
    std = np.std(numbers) + 1e-10  # avoid divide-by-zero
    return mean, std

def MeanAndStdDevForClass(mydata):
    info = {}
    grouped = groupUnderClass(mydata)
    for classValue, rows in grouped.items():
        features = [row[:-1] for row in rows]  # remove class label
        info[classValue] = [MeanAndStdDev(col) for col in zip(*features)]
    return info


In [7]:
def calculateGaussianProbability(x, mean, stdev):
    exponent = math.exp(-((x - mean) ** 2) / (2 * stdev ** 2))
    return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent


In [8]:
def calculateClassProbabilities(info, test):
    probabilities = {}
    for classValue, summaries in info.items():
        probabilities[classValue] = 1
        for i in range(len(summaries)):
            mean, std = summaries[i]
            probabilities[classValue] *= calculateGaussianProbability(test[i], mean, std)
    return probabilities


In [9]:
def predict(info, test):
    probabilities = calculateClassProbabilities(info, test)
    return max(probabilities, key=probabilities.get)

def getPredictions(info, test):
    return [predict(info, row) for row in test]


In [24]:
def accuracy_rate(test, predictions):
    correct = 0
    for i in range(len(test)):
        if test[i][-1] == predictions[i]:
            correct += 1
    return (correct / len(test)) * 100


In [20]:
def MeanAndStdDevForClass(mydata):
    info = {}
    data_dict = groupUnderClass(mydata)
    for classValue, instances in data_dict.items():
        features = [x[:-1] for x in instances]   # REMOVE CLASS COLUMN
        info[classValue] = [MeanAndStdDev(attribute) for attribute in zip(*features)]
    return info


In [25]:
data = {
    "Income": [50, 60, 55, 40, 30, 80, 90, 20, 70, 65, 85, 45],
    "CreditScore": [750, 720, 710, 650, 600, 800, 820, 580, 760, 740, 810, 630],
    "LoanAmount": [200, 220, 210, 150, 120, 300, 350, 100, 260, 240, 330, 140],
    "Approved": ["Yes", "Yes", "Yes", "No", "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)
mydata = df.values.tolist()

mydata = encode_class(mydata)

for row in mydata:
    for i in range(len(row) - 1):
        row[i] = float(row[i])



In [26]:
train_data, test_data = splitting(mydata, 0.7)

info = MeanAndStdDevForClass(train_data)
predictions = getPredictions(info, test_data)

print("Predicted labels:", predictions)
print("Actual labels:", [row[-1] for row in test_data])
print("Accuracy of Credit Scoring Model:", accuracy_rate(test_data, predictions))


Predicted labels: [0, 1, 0, 1]
Actual labels: [0, 1, 0, 1]
Accuracy of Credit Scoring Model: 100.0
