In [1]:
import sys
sys.path.insert(1, '../positron/')
import numpy as np
from sklearn.datasets import load_breast_cancer
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd

data = load_breast_cancer()
print(data["DESCR"])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

### Load data to numpy arrays

In [None]:
X = np.array(data.data)
y = np.array([data.target]).T
print("Loading in the data")
print("X =", X.shape, "y =", y.shape)

### Preprocess data for the neural network

In [None]:
import prep
y = prep.one_hot_encode(y)
print(y)

### Visualize data

In [None]:
df = pd.DataFrame(np.c_[X, y], columns=list(data["feature_names"]) + list(data["target_names"]))
print(data["feature_names"], data["target_names"])
df.head()

In [None]:
fig = plt.figure(figsize=(20, 15))
sns.heatmap(df.corr(), annot=True)

In [None]:
fig = plt.figure(figsize=(10, 7))
plt.grid()
plt.scatter(df["mean concave points"], df["mean concavity"], c=df["benign"], marker="o", alpha=0.5, cmap=plt.cm.winter)
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 7))
plt.grid()
plt.scatter(df["mean radius"], df["mean area"], c=df["benign"], marker="o", alpha=0.5, cmap=plt.cm.winter)
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 7))
plt.grid()
plt.scatter(df["mean texture"], df["mean symmetry"], c=df["benign"], marker="v", alpha=0.5, cmap=plt.cm.winter)
plt.show()

### Get training/test data

In [None]:
from sklearn.impute import SimpleImputer

# Relpace missing values if there are
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
X = imp_mean.fit_transform(X)

xtrain, xtest, ytrain, ytest = prep.split_train_test(X, y, shuffle=True)
for _ in (xtrain, xtest, ytrain, ytest,):
    print(_.shape, end=" ")

### Creating the network for all features

In [None]:
import deep

layer_sizes = [30, 30, 2]
layer_activ = ["sigmoid", "sigmoid", "binary_step"]
deep.seed(0)
ws, bs = deep.init_network(X.shape[1], layer_sizes)
print(layer_sizes, layer_activ)

### Train the network

In [None]:
import score


weights, biases, cost_history = deep.SGD(
    xtrain, ytrain,
    ws, bs,
    activations=layer_activ,
    costf="mse",
    eta=1.0,
    epochs=1000,
    mini_batch_size=1,
    verbose=True,
    cost_history_needed=False)
o = deep.feedforward(xtest, weights, biases, layer_activ)
print("Accuracy", 100 * round((np.sum(score.mse(o, ytest)) / np.sum(y)), 2), "% for", len(ytest), "samples")