In [269]:
import csv, numpy as np

In [270]:
def read_csv_as_lists(filename):
    data = []

    with open(filename, mode='r', encoding='utf-8', newline='') as file:

        csv_reader = csv.reader(file)

        for row in csv_reader:
            data.append(row)
            
    return data

In [271]:
data = read_csv_as_lists('framingham.csv')

In [272]:
df = np.array(data)

In [273]:
headings = df[0:1].astype(str)

In [274]:
entries = df[1:]

In [275]:
entries[entries == "NA"] = "nan"
entries = entries.astype(float)

In [276]:
entries.shape

(4238, 16)

In [277]:
Xs = entries[0:,0:15]
Ys = entries[:, 15:]
Ys

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], shape=(4238, 1))

In [331]:
col_means = np.nanmean(Xs, axis=0)
inds = np.where(np.isnan(Xs))

Xs[inds] = np.take(col_means, inds[1])

In [442]:
mean = np.mean(Xs, axis=0)
std = np.std(Xs, axis=0)
Xs = (Xs - mean) / std

array([False, False, False, ..., False, False, False], shape=(3390,))

In [443]:
training_split = 0.8
training_labels = (Ys[0:round(Ys.shape[0] * training_split)]).flatten()
testing_labels =  (Ys[round(Ys.shape[0] * training_split):]).flatten()


training_data = Xs[0:round(Xs.shape[0] * training_split), 0:15]
testing_data = Xs[round(Xs.shape[0] * training_split):, 0:15]


In [444]:
print(np.isnan(Xs).any())
print(np.isnan(Ys).any())

False
False


In [445]:
training_data.shape, training_labels.shape

((3390, 15), (3390,))

In [480]:
import numpy as np

class ClassificationModel:
    def __init__(self, nin, lr=0.05):
        self.w = [np.random.uniform(-1, 1) for _ in range(nin)]
        self.b = np.random.uniform(-1, 1)
        self.lr = lr
    def __call__(self, Xs):
        self.act = np.dot(Xs,self.w) + self.b
        self.act = np.clip(self.act,-400,+400)
        return 1 / (1 + np.exp(-self.act))

    def loss(self, X, Y):
        losses = []
        for x, y in zip(X, Y):
            p = self(x)
            epsilon = 1e-14
            p = np.clip(p,epsilon,1-epsilon)
            current_loss = - (y * np.log(p + epsilon) + (1-y)*(np.log(1-y + epsilon)))
            losses.append(current_loss)
        return np.mean(losses)


    def step(self,Xs,Ys):
        m = Xs.shape[0]
        preds = model(Xs)
        
        grad_w = (1/m)*(np.dot(Xs.T,preds-Ys))
        grad_b = np.mean(preds - Ys)

        self.w += (-1 * self.lr * grad_w)
        self.b += (-1 * self.lr * grad_b)

    def train(self,Xs,Ys,epochs = 500):
        for _ in range(epochs):
            self.step(Xs,Ys)
            if _ % 50 == 0 :
                print(f"epoch: {_}, loss: {self.loss(Xs,Ys)}")


    def diagnos(self,data_set):
        return np.where(self(data_set) >= 0.5, 1,0)

    def params(self):
        return self.w + [self.b]

    

In [481]:
model = ClassificationModel(15)

In [488]:
epochs = 50
for epoch in range(epochs):
    model.train(training_data,training_labels)

epoch: 0, loss: 0.24886116060479668
epoch: 50, loss: 0.24886116060479668
epoch: 100, loss: 0.24886116060479668
epoch: 150, loss: 0.24886116060479668
epoch: 200, loss: 0.24886116060479668
epoch: 250, loss: 0.24886116060479666
epoch: 300, loss: 0.24886116060479666
epoch: 350, loss: 0.24886116060479666
epoch: 400, loss: 0.24886116060479666
epoch: 450, loss: 0.24886116060479666
epoch: 0, loss: 0.24886116060479668
epoch: 50, loss: 0.24886116060479666
epoch: 100, loss: 0.24886116060479666
epoch: 150, loss: 0.24886116060479666
epoch: 200, loss: 0.24886116060479666
epoch: 250, loss: 0.24886116060479666
epoch: 300, loss: 0.24886116060479666
epoch: 350, loss: 0.24886116060479666
epoch: 400, loss: 0.24886116060479666
epoch: 450, loss: 0.24886116060479666
epoch: 0, loss: 0.24886116060479666
epoch: 50, loss: 0.24886116060479666
epoch: 100, loss: 0.24886116060479666
epoch: 150, loss: 0.24886116060479666
epoch: 200, loss: 0.24886116060479666
epoch: 250, loss: 0.24886116060479666
epoch: 300, loss: 0.2

In [489]:
model.loss(training_data,training_labels)

np.float64(0.24886116060479666)

In [490]:
accuracy = f"{round(np.mean(model.diagnos(testing_data)==testing_labels) * 100)}%"
accuracy

'85%'