In [21]:
import numpy as np
import pandas as pd
import torch
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer

In [20]:
dataset = load_breast_cancer()

((569, 30), (569,))

In [212]:
dset.shape

(569, 30)

In [218]:
dset = dataset.data
dset = np.column_stack((dset, dataset.target))
dset.shape

(569, 31)

In [222]:
train_set = dset[:-100]
val_set = dset[-100:-50]
test_set = dset[-50:]

In [223]:
len(train_set), len(val_set), len(test_set)

(469, 50, 50)

In [280]:
class LogisticModel():
  def __init__(self, n_input):
    self.lr = 1e-5
    self.layer = np.zeros((n_input, 1))

  def cost(self, pred, y):
    cost = (y) * np.log(pred) + (1-y)*np.log(1-pred)
    regularization_cost = 10 *np.mean(self.layer**2)
    return -np.mean(cost) + regularization_cost

  def sigmoid(self, X):
    return 1/(1+np.exp(-X))

  def gradient_descent(self, pred, y, X):
    length = len(X)
    difference = y - pred
    gradient = (-2/length) * X.T.dot(difference)
    self.layer -= self.lr * gradient

  def forward(self, X, y):
    pred = self.sigmoid(X.dot(self.layer))

    cost = self.cost(pred, y)
    self.gradient_descent(pred, y, X)

  def predict(self, X):
    vals = self.sigmoid(X.dot(self.layer))
    for i in range(len(vals)):
      if vals[i] >= 0.5:
        vals[i] = 1
      else:
        vals[i] = 0
    return vals

  def accuracy(self, X, y):
    score = 0
    _x = self.predict(X)
    for i in range(len(y)):
      if _x[i] == y[i]:
        score += 1

    return score / len(y)

In [281]:
model = LogisticModel(30)

In [282]:
for e in range(100):
  X = train_set[:, :-1]
  y = train_set[:, -1:]
  model.forward(X, y)

  if e%10 == 0:
    X = val_set[:, :-1]
    y = val_set[:, -1:]
    print(model.accuracy(X, y))


0.26
0.26
0.74
0.26
0.74
0.9
0.76
0.88
0.76
0.8


In [278]:
X = test_set[:, :-1]
y = test_set[:, -1:]
print(model.accuracy(X, y))

0.94
