# Regularization

We will use sklearn's `MLPClassifier` to explore the generalization gap and regularization.

## Imports

In [None]:
import os
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
# Plotting
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings("ignore")

sns.set_style("whitegrid")

%matplotlib inline

## Data set

We will use the artificial [moons data set](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html) from sklearn.

In [None]:
# number of examples in the data set
n_examples = 1000
# fraction of the data set that goes into the valid set
valid_sz = 0.2

X, y = make_moons(n_samples=n_examples, noise=0.3, random_state=1)

# Train-valid split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=valid_sz, random_state=2)

## Network

In [None]:
# architecture
hidden_layers = (100, 400, 400, 100)
# L2 regularization parameter
alpha = 0.0
# Number of epochs per iteration in the training loop
max_iter = 2
# Number of training steps
n_train_steps = 400
# x-ticks for plotting
epochs = np.arange(1, n_train_steps + 1) * max_iter
# early stopping
early_stopping = False # 1, 3, 4, 5, 6*
# warm_start: when set to True, reuse the solution of the previous call to fit as initialization
warm_start = True

clf = MLPClassifier(random_state=6, 
                    hidden_layer_sizes=hidden_layers, 
                    alpha=alpha, 
                    solver='adam', 
                    max_iter=max_iter, 
                    tol=0., 
                    early_stopping=early_stopping,
                    validation_fraction=0,
                    warm_start=warm_start)

train_acc, valid_acc = [], []

for i in range(n_train_steps):
    if (i % 40) == 0: 
      print(i, end='\n')
    else:
      print(i, end=' ')
    clf.fit(X_train, y_train)
    train_acc.append(clf.score(X_train, y_train))
    valid_acc.append(clf.score(X_valid, y_valid))

In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()

## Plotting to visualize the dicision boundary

In [None]:
# the function making up the graph of a dataset
def make_plot(X, y, plot_name, fname=None, XX=None, YY=None, preds=None, dark=False):
    """
    """
    if dark:
        plt.style.use('dark_background')
    else:
        sns.set_style("whitegrid")
        
    plt.figure(figsize=(16,12))
    axes = plt.gca()
    axes.set(xlabel="$X_1$", ylabel="$X_2$")
    plt.title(plot_name, fontsize=30)
    plt.subplots_adjust(left=0.20)
    plt.subplots_adjust(right=0.80)
    
    if XX is not None and YY is not None and preds is not None:
        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=1, cmap=cm.Spectral)
        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
        
    plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='none')
    
    if fname:
        plt.savefig(fname)
        plt.close()

    if dark:
        plt.style.use('default')
        sns.set_style("whitegrid")

# boundary of the graph
# GRID_X_START = -1.5
GRID_X_START, GRID_X_END = X[:, 0].min(), X[:, 0].max()
# GRID_X_END = 2.5
# GRID_Y_START = -1.0
GRID_Y_START, GRID_Y_END = X[:, 1].min(), X[:, 1].max()
# GRID_Y_END = 2
# output directory (the folder must be created in the current working directory)
OUTPUT_DIR = "./fig/"        
grid = np.mgrid[GRID_X_START:GRID_X_END:100j,GRID_Y_START:GRID_Y_END:100j]
grid_2d = grid.reshape(2, -1).T
XX, YY = grid        

In [None]:
if not os.path.isdir('fig'):
    os.mkdir('fig')

## Train a network and plot the desicion boundary

In [None]:
# architecture
hidden_layers = (100, 400, 400, 100)
# L2 regularization parameter
alpha = 0.0
# Number of epochs per iteration in the training loop
max_iter = 2
# Number of training steps
n_train_steps = 400
# x-ticks for plotting
epochs = np.arange(1, n_train_steps + 1) * max_iter
# early stopping
early_stopping = False # 1, 3, 4, 5, 6*
# warm_start: when set to True, reuse the solution of the previous call to fit as initialization
warm_start = True

clf = MLPClassifier(random_state=6, 
                    hidden_layer_sizes=hidden_layers, 
                    alpha=alpha, 
                    solver='adam', 
                    max_iter=max_iter, 
                    tol=0., 
                    early_stopping=early_stopping,
                    validation_fraction=0,
                    warm_start=warm_start)

train_acc, valid_acc = [], []

for i in range(n_train_steps):
    clf.fit(X_train, y_train)
    train_acc.append(clf.score(X_train, y_train))
    valid_acc.append(clf.score(X_valid, y_valid))
    if not (i % 10):
        print(i, end=' ')
        pred_probs = clf.predict_proba(grid_2d)[:, 0]
        plot_title = 'epoch %d' % epochs[i]
        fname = "model_{:05}.png".format(epochs[i])
        fpath = os.path.join(OUTPUT_DIR, fname)
        make_plot(X_valid, y_valid, plot_title, fname=fpath, XX=XX, YY=YY, preds=pred_probs.T, dark=True)



In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()  

## Tasks

Decrease the generalization error in **four** different ways.

### Method 1

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()  

### Method 2

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()  

### Method 3

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()  

### Method 4

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(epochs, train_acc, label='Training')
ax.plot(epochs, valid_acc, label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()  