In [1]:
import numpy as np
import pandas as pd
import torch
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer

In [2]:
dataset = load_breast_cancer() # getting the breast cancer data from sklearn

In [3]:
dset = dataset.data
dset = np.column_stack((dset, dataset.target))
dset.shape

(569, 31)

In [4]:
train_set = dset[:-100]
val_set = dset[-100:-50]
test_set = dset[-50:]

In [5]:
len(train_set), len(val_set), len(test_set)

(469, 50, 50)

In [101]:
class LogisticModel():
  def __init__(self, n_input):
    self.lr = 1e-4
    self.layer = np.zeros((n_input, 1)) # theta of values that will compute output

  def cost(self, pred, y):
    cost = y-pred # calculates the cost (standard difference)
    return cost

  def sigmoid(self, X):
    return 1/(1+np.exp(-X))

  def gradient_descent(self, pred, y, X): # regular gradient descent
    length = len(X)
    difference = self.cost(pred, y)
    gradient = (-2/length) * X.T.dot(difference) # finding the gradients and also the derivatives
    self.layer -= self.lr * gradient

  def forward(self, X, y): #forwards input through hidden layer
    pred = self.sigmoid(X.dot(self.layer))
    self.gradient_descent(pred, y, X)

  def predict(self, X):
    vals = self.sigmoid(X.dot(self.layer))
    vals = [ int(x >= 0.5) for x in vals ]
    return vals

  def accuracy(self, X, y):
    score = 0
    _x = self.predict(X)
    for i in range(len(y)):
      score += int(_x[i] == y[i])

    return score / len(y)

In [102]:
model = LogisticModel(30)

In [103]:
for e in range(1000):
  X = train_set[:, :-1]
  y = train_set[:, -1:]
  model.forward(X, y)

  if e%100 == 0:
    X = val_set[:, :-1]
    y = val_set[:, -1:]
    print(model.accuracy(X, y))


0.26
0.76
0.86
0.9
0.9
0.84
0.84
0.86
0.9
0.84


In [104]:
X = test_set[:, :-1]
y = test_set[:, -1:]
print(model.accuracy(X, y))

0.9
