In [21]:
import numpy as np
import pandas as pd
import torch
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer

In [20]:
dataset = load_breast_cancer()

((569, 30), (569,))

In [56]:
dset = []
for i in zip(dataset.data, dataset.target):
  a = ([0]*2)
  a[i[1]] = 1
  dset.append((torch.tensor(i[0], dtype=torch.float32),
               torch.tensor(a, dtype=torch.float32)))

In [57]:
train_set = dset[:-100]
val_set = dset[-100:-50]
test_set = dset[-50:]

In [58]:
train_set[0][0]

tensor([1.7990e+01, 1.0380e+01, 1.2280e+02, 1.0010e+03, 1.1840e-01, 2.7760e-01,
        3.0010e-01, 1.4710e-01, 2.4190e-01, 7.8710e-02, 1.0950e+00, 9.0530e-01,
        8.5890e+00, 1.5340e+02, 6.3990e-03, 4.9040e-02, 5.3730e-02, 1.5870e-02,
        3.0030e-02, 6.1930e-03, 2.5380e+01, 1.7330e+01, 1.8460e+02, 2.0190e+03,
        1.6220e-01, 6.6560e-01, 7.1190e-01, 2.6540e-01, 4.6010e-01, 1.1890e-01])

In [59]:
len(train_set), len(val_set), len(test_set)

(469, 50, 50)

In [187]:
class LogisticModel(torch.nn.Module):
  def __init__(self, n_inputs, n_outputs):
    super(LogisticModel, self).__init__()

    self.encoder = torch.nn.Sequential(
        torch.nn.Linear(n_inputs*5, 16),
        torch.nn.ReLU(),
        torch.nn.Linear(16, 4),
        torch.nn.ReLU(),
        torch.nn.Linear(4, n_outputs)
    )

  def forward(self, x):
    features = x

    for i in range(1, 3):
            features = torch.cat((features, torch.sin(i*x)), dim = -1)
            features = torch.cat((features, torch.cos(i*x)), dim = -1)

    return torch.sigmoid(self.encoder(features))

In [188]:
model = LogisticModel(30, 2)

In [189]:
EPOCHS = 300
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-5)
criterion = torch.nn.CrossEntropyLoss()

In [190]:
for e in range(EPOCHS):
  model.train()
  for x, label in train_set:
    optimizer.zero_grad()
    output = model(x)

    loss = criterion(output, label)
    loss.backward()

    optimizer.step()

  if e%10 == 0:
    model.eval()
    accuracy = 0
    for x, label in val_set:
      label = label.detach().numpy()
      output = model(x).detach().numpy()

      if label[np.argmax(output)] == 1:
        accuracy += 1
    print(e, '\t', accuracy, "/", len(val_set), "\t", accuracy/(len(val_set)))

0 	 37 / 50 	 0.74
10 	 38 / 50 	 0.76
20 	 37 / 50 	 0.74
30 	 37 / 50 	 0.74
40 	 37 / 50 	 0.74
50 	 44 / 50 	 0.88
60 	 44 / 50 	 0.88
70 	 44 / 50 	 0.88
80 	 44 / 50 	 0.88
90 	 44 / 50 	 0.88
100 	 44 / 50 	 0.88
110 	 44 / 50 	 0.88
120 	 43 / 50 	 0.86
130 	 43 / 50 	 0.86
140 	 43 / 50 	 0.86
150 	 43 / 50 	 0.86
160 	 43 / 50 	 0.86
170 	 44 / 50 	 0.88
180 	 44 / 50 	 0.88
190 	 45 / 50 	 0.9
200 	 45 / 50 	 0.9
210 	 45 / 50 	 0.9
220 	 45 / 50 	 0.9
230 	 45 / 50 	 0.9
240 	 45 / 50 	 0.9
250 	 45 / 50 	 0.9
260 	 45 / 50 	 0.9
270 	 44 / 50 	 0.88
280 	 44 / 50 	 0.88
290 	 44 / 50 	 0.88


In [192]:
model.eval()
accuracy = 0
for x, label in test_set:
  label = label.detach().numpy()
  output = model(x).detach().numpy()

  if label[np.argmax(output)] == 1:
    accuracy += 1
print(accuracy, "/", len(val_set), "\t", accuracy/(len(val_set)))

48 / 50 	 0.96
