In [None]:
from sklearn.datasets import load_breast_cancer, make_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

# MLPClassifier

In [None]:
# we want to preprocess features to be between 0-1
scaler = MinMaxScaler()

dataset = load_breast_cancer()
X_cancer, y_cancer = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state=0)

# normalizing the data for more accurate results
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

# an MLP classifier is sklearn's version of a neural network. A neural network is an ensemble of nodes
# connected to other nodes sequentially. Each node in the network receives some input and intern
# will give some output. Nodes are connected by their corresponding weights. Each node has it's own
# activation function (something that determines whether the node is excited or not) 
# and each layer has it's own bias (similar to weight).

# lets take a simple example of a neural network that has 2 layers of 10 nodes each.
# As an example project, we will want the neural network to predict which number a certain
# array is representing based on it's index.
# eg. given [0,0,1,0,0,0,0,0,0,0] the network should predict 2 as the highest value is in the second
# position.

# before we begin, the network sets the weight values and the bias value randomly. This means that
# the connection between the nodes is random. Lets say that the first data the network sees is the
# following: [1,0,0,0,0,0,0,0,0,0]. We would want the output nodes to show [1,0,0,0,0,0,0,0,0,0]
# as the output but you would probably get something that looks like this:
# [-1, 43, 6.4, 3, ...., 0.3] instead. This is because the weights and the bias are currently set to
# be random values.

# So how does the network correct itself? It takes advantage of two key things: an optimizer and
# a loss function. The loss function says how far off from the ideal value the output is, and the
# optimizer shifts the weights so that it can make the loss function have a smaller value. After
# repeatedly correcting itself through each run through, you will arrive at a more accurate estimation.

# it is important to remember the role that activation functions play in neural networks. Activation
# functions are assigned to each node individually and help to determine what the output of the node
# will be. Common activation functions are:
# relu - set output to 0 if input <= 0 otherwise output = input
# sigmoid - squish value between 0-1
# tanh - squish value between -1 and +1

# here we are saying that we want a neural network with two layers both having 100 nodes. We also
# want to use L2 regularization with an alpha value of 5.0 so that our model does not overfit. Finally,
# we are specifying that we want the lbfgs optimizer.
clf = MLPClassifier(
    hidden_layer_sizes=[100,100], 
    alpha=5.0, 
    random_state=0, 
    solver='lbfgs'
).fit(X_train_scaled, y_train)

In [None]:
print('Accuracy of NN classifier on training set: {:.2f}'
     .format(clf.score(X_train_scaled, y_train)))
print('Accuracy of NN classifier on test set: {:.2f}'
     .format(clf.score(X_test_scaled, y_test)))

# MLPRegressor

In [None]:
# making random regression data
plt.figure()
plt.title('Sample regression')
X_R1, y_R1 = make_regression(n_samples = 100, n_features=1,
                            n_informative=1, bias = 150.0,
                            noise = 30, random_state=0)
plt.scatter(X_R1, y_R1, marker= 'o', s=50)
plt.show()

In [None]:
fig, subaxes = plt.subplots(2, 3, figsize=(15, 12))

# create a list of 50 lists that have a single value between -3 and 3
X_predict_input = np.linspace(-3, 3, 50).reshape(-1,1)

# tricky list indexing here. I am saying traverse the whole list, but go up by 5 each time
X_train, X_test, y_train, y_test = train_test_split(X_R1[0::5], y_R1[0::5], random_state=0)

# through this traversal, we want to test the different activation functions on each row
# and the different alpha values on each column and see how the change affects the output
for axrow, actrow in zip(subaxes, ['tanh', 'relu']):
    for alpcol, ax in zip([0.0001, 1.0, 100], axrow):
        # using the regression version of the neural network here. This is similar in architecture
        # to the classifier but the output will be a continuous value
        mlpreg = MLPRegressor(
            hidden_layer_sizes = [100,100],
            activation = actrow,
            alpha = alpcol,
            solver = 'lbfgs'
        ).fit(X_train, y_train)
        # get prediction values from generated input values
        y_predict_output = mlpreg.predict(X_predict_input)
        # plot generated input vs prediction
        ax.plot(X_predict_input, y_predict_output, '^', markersize = 10)
        # plot input vs true output
        ax.plot(X_train, y_train, 'o')
        ax.set_xlabel('Input feature')
        ax.set_ylabel('Target value')
        ax.set_title('MLP regression\nalpha={}, activation={})'.format(alpcol, actrow))
        plt.tight_layout()