<font color = green >

# Neural networks

</font>

In [4]:
import matplotlib.pyplot as plt
import numpy as np

In [5]:
%matplotlib notebook

In [3]:
from scipy.stats import logistic
import h5py # common package to interact with a dataset that is stored on an H5 file.
import os

<font color = green >

## Activation functions
</font>

<font color = green >

## Sigmoid
</font>

In [None]:
plt.figure()
x= np.linspace(-6,6,100)
y = logistic.cdf(x)
plt.plot (x,y)
ax= plt.gca()
# ax.set_aspect('equal')
# ax.grid(True, which='both') 

ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
plt.title('Sigmoid Function')

<font color = green >

## Rectified Linear Unit (ReLU)
</font>

In [None]:
plt.figure()
x= np.linspace(-6,6,100)
y = np.maximum(x,0)
plt.plot (x,y)
ax= plt.gca()
# ax.set_aspect('equal')
# ax.grid(True, which='both') 

ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
plt.title('ReLU Function')


<font color = green >

## Leaky ReLU
</font>

In [None]:
plt.figure()
x= np.linspace(-6,6,100)
y = np.maximum(x,0.1*x)
plt.plot (x,y)
ax= plt.gca()
# ax.set_aspect('equal')
# ax.grid(True, which='both') 

ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
plt.title('Leaky ReLU Function')


<font color = green >

## Tanh 
</font>

In [None]:
plt.figure()
x= np.linspace(-6,6,100)
y = np.tanh(x)
plt.plot (x,y)
ax= plt.gca()
# ax.set_aspect('equal')
# ax.grid(True, which='both') 

ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
plt.title('Tanh Function')

In [None]:
import h5py # common package to interact with a dataset that is stored on an H5 file.
import os
cwd= os.getcwd() # current working directory
path = os.path.join(cwd,'data') 

In [None]:
def load_dataset():
    file_name=  os.path.join(path , 'train_catvnoncat.h5')
    train_dataset = h5py.File(file_name, "r")
    X_train = np.array(train_dataset["train_set_x"][:]) # your train set features
    Y_train = np.array(train_dataset["train_set_y"][:]) # your train set labels
    
    file_name=  os.path.join(path , 'test_catvnoncat.h5')
    test_dataset = h5py.File(file_name, "r")
    X_test = np.array(test_dataset["test_set_x"][:]) # your test set features
    Y_test = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = ['non-cat','cat']
    
    Y_train = Y_train.reshape(-1,1)
    Y_test = Y_test.reshape(-1,1)
    
    return X_train, Y_train, X_test, Y_test, classes

In [None]:
X_train,Y_train, X_test, Y_test, classes = load_dataset()

In [None]:
index = 2
plt.figure()
plt.imshow(X_train[index])
print ("y = " + str(Y_train[index,:]) + ", it's a '" + classes[np.squeeze(Y_train[index,:])] +  "' picture.")

In [None]:
m_train, num_px,_,_ = X_train.shape
m_test =  X_test.shape[0]
X_train_flatten = X_train.reshape(m_train, -1)
X_test_flatten =  X_test.reshape(m_test, -1)


In [None]:
X_train_scaled = X_train_flatten/255.
X_test_scaled = X_test_flatten/255.

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
Y_train = np.squeeze(Y_train)
Y_test = np.squeeze(Y_test)
print ('Y_train.shape=', Y_train.shape)
print ('Y_test.shape=', Y_test.shape)
print ('X_train_scaled.shape=', X_train_scaled.shape)

In [None]:
# Using Adam solver which is default in MLP is not good for small amount of data. 
clf= MLPClassifier(
    verbose= True,
    random_state=0,
    alpha = 10
)
clf.fit(X_train_scaled,Y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled, Y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled, Y_test)))


<font color = green>

###  Shallow Neural Network  (2 Layers)

</font>



In [None]:
#  solver= 'lbfgs'
clf= MLPClassifier(
    solver= 'lbfgs', 
    random_state=20,
    # hidden_layer_sizes=(100), # default value 
)
clf.fit(X_train_scaled,Y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled, Y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled, Y_test)))


<font color = green>

###  3 Layers Neural Network 

</font>

Note:  This data set is too small for large nn : even 3 layers nn with 3 neurons in each layer causes overfitting 



In [None]:
clf= MLPClassifier(
    solver= 'lbfgs',
    random_state=20,
    hidden_layer_sizes=(3,3),
    alpha=20, 
    max_iter=1000,
)
clf.fit(X_train_scaled,Y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled, Y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled, Y_test)))


In [None]:
#  cannot reproduce: 
# random_state=20,
# hidden_layer_sizes=(20,7,10),
# max_iter=30,
# train accuracy= 88.038%
# test accuracy= 80.000%

<font color = green>

##  Synthetic Data

</font>



In [None]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

In [None]:
# help(make_blobs)

In [None]:
def plot_decision_boundary(clf, X_train, y_train, X_test=None, y_test= None, title=None, precision=0.05,plot_symbol_size = 50, ax= None,  is_extended=True):

    '''    
    Draws the binary decision boundary for X that is nor required additional features and transformation (like polynomial)
    '''
    # Create color maps - required by pcolormesh
    from matplotlib.colors import  ListedColormap
    colors_for_points = np.array(['grey', 'orange']) # neg/pos
    colors_for_areas = np.array(['grey', 'orange']) # neg/pos  # alpha is applied later 
    cmap_light = ListedColormap(colors_for_areas)
 
   
    mesh_step_size = precision #.01  # step size in the mesh
    if X_test is None or y_test is None:
        show_test= False
        X= X_train
    else:
        show_test= True
        X= np.concatenate([X_train,X_test], axis=0)
    x1_min, x1_max = X[:, 0].min() - .1, X[:, 0].max() + 0.1
    x2_min, x2_max = X[:, 1].min() - .1, X[:, 1].max() + 0.1
    # Create grids of pairs
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, mesh_step_size),
                         np.arange(x2_min, x2_max, mesh_step_size))
    # Flatten all samples 
    target_samples_grid= (np.c_[xx1.ravel(), xx2.ravel()])

    print ('Call prediction for all grid values (precision of drawing = {},\n you may configure to speed up e.g. precision=0.05)'.format(precision))
    Z = clf.predict(target_samples_grid)

    # Reshape the result to original meshgrid shape 
    Z = Z.reshape(xx1.shape)
    
    if ax:
        plt.sca(ax)
    
    # Plot all meshgrid prediction      
    plt.pcolormesh(xx1, xx2,Z, cmap = cmap_light, alpha=0.2)

    # Plot train set
    plt.scatter(X_train[:, 0], X_train[:, 1], s=plot_symbol_size, 
                c=colors_for_points[y_train.ravel()], edgecolor = 'black',alpha=0.6)
    # Plot test set
    if show_test:
        plt.scatter(X_test[:, 0], X_test[:, 1], marker='^', s=plot_symbol_size, 
                c=colors_for_points[y_test.ravel()],edgecolor = 'black',alpha=0.6)
    if  is_extended:
        # Create legend  
        import matplotlib.patches as mpatches # use to assign lavels for colored points 
        patch0 = mpatches.Patch(color=colors_for_points[0], label='negative')
        patch1 = mpatches.Patch(color=colors_for_points[1], label='positive')
        plt.legend(handles=[patch0, patch1])
    plt.title(title)
    if is_extended:
        plt.xlabel('feature 1')
        plt.ylabel('feature 2')
    else:
        plt.tick_params(
        top =False,
        bottom= False,
        left  = False,
        labelleft = False, 
        labelbottom = False
        )

        
    
def plot_data_logistic_regression(X,y,legend_loc= None, title= None):
    '''
    :param X: 2 dimensional ndarray
    :param y:  1 dimensional ndarray. Use y.ravel() if necessary
    :return:
    '''

    positive_indices = (y == 1)
    negative_indices = (y == 0)
#     import matplotlib as mpl
    colors_for_points = ['grey', 'orange'] # neg/pos

    plt.scatter(X[negative_indices][:,0], X[negative_indices][:,1], s=40, c=colors_for_points [0], edgecolor = 'black', label='negative', alpha = 0.7)
    plt.scatter(X[positive_indices][:,0], X[positive_indices][:,1], s=40, c=colors_for_points [1], edgecolor = 'black',label='positive', alpha = 0.7)
    plt.title(title)
    plt.legend(loc= legend_loc)


In [None]:
X_mk8, y_mk8 = make_blobs(n_samples = 2000, n_features = 2, centers = 8,cluster_std = 2, random_state = 4)
# X_mk8, y_mk8 = make_blobs( n_samples = 100, n_features = 2,centers = 8, cluster_std = 1.3,random_state = 4)

X_train, X_test, y_train, y_test  =  train_test_split(X_mk8, y_mk8 % 2, random_state= 0)
# y_train = y_mk8 % 2 # make it binary since make_blobs  with centers = 8 creates y in [0..7]
plt.figure()
plot_data_logistic_regression(X_train,y_train, title= 'Make Classification')

In [None]:
clf = MLPClassifier( # default=(100,)
    solver= 'lbfgs',
    max_iter= 10000 # default=200
).fit(X_train,y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train,y_train)))
print("train accuracy= {:.3%}".format(clf.score (X_test,y_test)))

In [None]:
help(MLPClassifier)

<font color = green>

###  Single Layer Different Units Number 

</font>



In [None]:
_, ((ax1, ax2, ax3)) = plt.subplots(1, 3, figsize= (10,4))
axes = (ax1, ax2, ax3)
layer_dim  = (1,10,100)
for dim, ax in  zip(layer_dim, axes):    
    clf = MLPClassifier(hidden_layer_sizes=(dim),solver= 'lbfgs', max_iter=10000).fit(X_train, y_train)
    accuracy_train = clf.score(X_train, y_train)
    accuracy_test = clf.score(X_test, y_test)    
    plot_decision_boundary(clf, X_train, y_train,  X_test=X_test, y_test= y_test,title='1 layer {} unints. \nAccuracy_train = {:.0%} \nAccuracy_test = {:.0%}'.
                           format(dim, accuracy_train, accuracy_test),
                           precision=0.05, plot_symbol_size = 30, is_extended=False, ax= ax)
plt.tight_layout()

<font color = green>

###  Regularization

</font>



In [None]:
_, ((ax1, ax2)) = plt.subplots(1, 2, figsize= (10,6))
axes = (ax1, ax2)
alphas = (0.001, 5)
for alpha, ax in  zip(alphas, axes):    
    clf = MLPClassifier(
        hidden_layer_sizes=(100,100),
        solver= 'lbfgs', 
        alpha=alpha,
        random_state = 0,       
        max_iter= 10000
    )
    clf.fit(X_train, y_train)
    accuracy_train = clf.score(X_train, y_train)
    accuracy_test = clf.score(X_test, y_test)
 
    plot_decision_boundary(clf, X_train, y_train,X_test=X_test, y_test=y_test, title='2 layers, alpha = {} . \nAccuracy_train = {:.0%} \nAccuracy_test = {:.0%}'.
                           format(alpha, accuracy_train, accuracy_test),
                           precision=0.05, plot_symbol_size = 30, is_extended=False, ax= ax)
plt.tight_layout()

<font color = green>

###  Activation Functions

</font>



In [None]:
_, ((ax1, ax2)) = plt.subplots(1, 2, figsize= (10,6))
axes = (ax1, ax2)
activations = ('tanh', 'relu')
for activation, ax in  zip(activations, axes):    
    clf = MLPClassifier(
        hidden_layer_sizes=(100,100),
        solver= 'lbfgs', 
        random_state = 0,
        activation = activation, 
        max_iter= 10000
    )
    clf.fit(X_train, y_train)
    accuracy_train = clf.score(X_train, y_train)
    accuracy_test = clf.score(X_test, y_test)
 
    plot_decision_boundary(clf, X_train, y_train,X_test=X_test, y_test=y_test, title='2 layers, activation = {} . \nAccuracy_train = {:.0%} \nAccuracy_test = {:.0%}'.
                           format(activation, accuracy_train, accuracy_test),
                           precision=0.05, plot_symbol_size = 30, is_extended=False, ax= ax)
plt.tight_layout()

<font color = green>

##  Brest Cancer Dataset

</font>



In [None]:
# https://scikit-learn.org/stable/datasets/index.html#breast-cancer-dataset
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y= True)
print ('X.shape= ', X.shape)
print ('y.shape= ', y.shape)

In [None]:
X_train, X_test, y_train, y_test  =  train_test_split(X, y, random_state= 0)


In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
clf = MLPClassifier(
    solver= 'lbfgs',
    hidden_layer_sizes= (100,100),
    random_state = 0
).fit(X_train,y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train,y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test,y_test)))

In [None]:
clf = MLPClassifier(
    solver= 'lbfgs',
    hidden_layer_sizes= (100,10),
    random_state = 0,
    alpha= 5
).fit(X_train_scaled,y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled,y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled,y_test)))

<font color = green>

##  Handwritten Digits

</font>



<font color = green>

###  Load the data 

</font>



In [None]:
import h5py # common package to interact with a dataset that is stored on an H5 file.
import os

In [None]:
from scipy.io import loadmat
fn=  os.path.join(path , 'digits.mat')
mat= loadmat(fn)
X = mat['X']
y = mat['y'].ravel()
y = y%10 # make 10 to 0 
X_train, X_test, y_train, y_test  =  train_test_split(X, y, random_state= 0)
print ('X_train.shape=', X_train.shape)
print ('X_test.shape=', X_test.shape)
print ('y_train.shape=', y_train.shape)
print ('y_test.shape=', y_test.shape)


<font color = green>

###  Visualize the data

</font>



In [None]:
def draw_pixels_img(x, ax = None, title=None):
    '''
    :param x: ndarray - row
    '''

    img_width = int(np.sqrt(x.shape[0]))
    img_height = x.shape[0]/img_width
    try:
        data = x.reshape(img_width, -1).T
    except:
        SystemExit('Cannot compute the size of the picture')
    if ax:
        plt.sca(ax)
    else:
        plt.figure(figsize=(2, 2))
    plt.imshow(data, cmap='Greys', interpolation='nearest')
    plt.axis('off')
    if not title is None:
        plt.title(title)

In [None]:
plt.figure()
for i in range (10): 
    for j in range (10):
        index = 500*i+j           
        ax = plt.subplot(10,10,i*10+j+1) 
        draw_pixels_img(X[index], ax)
plt.tight_layout(h_pad=5) 


In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
clf = MLPClassifier(
#     solver= 'lbfgs',
#     hidden_layer_sizes= (100,100),
    random_state = 10,
    alpha = 10
).fit(X_train_scaled,y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled,y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled,y_test)))

In [None]:
def draw_digits_samples(X,n_rows= 10, n_cols = 10, y=None):
    indices = np.random.randint(0, len(X), n_rows*n_cols)
    for i in range (n_rows): 
        for j in range (n_cols):
            index = n_rows*i+j           
            ax = plt.subplot(n_rows,n_cols,index+1) 
            if y is None: 
                draw_pixels_img(X[indices[index]], ax)
            else:
                draw_pixels_img(X[indices[index]], ax, title=y[indices[index]])

    plt.tight_layout(h_pad=-1) 

plt.figure()
# draw_digits_samples(X_train,X_train_scaled, n_rows= 4, n_cols = 6, clf = clf)
predicted= clf.predict(X_test_scaled)
draw_digits_samples(X_test, n_rows= 4, n_cols = 6, y = predicted)



<font color = green >

## Home Task
</font>

<font color = green>

### Brest Cancer Dataset

</font>



In [None]:
# https://scikit-learn.org/stable/datasets/index.html#breast-cancer-dataset
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
X, y = load_breast_cancer(return_X_y= True)
print ('X.shape= ', X.shape)
print ('y.shape= ', y.shape)

In [None]:
# YOUR_CODE.  Preproces data, train classifier and evaluate the perfromance on train and test sets 
# START_CODE 
scaler = StandardScaler()
X_train, X_test, y_train, y_test  =  train_test_split(X, y, random_state= 0)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
clf = MLPClassifier(
    solver= 'lbfgs',
    random_state = 20,
    alpha=6
).fit(X_train_scaled,y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled,y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled,y_test)))
# END_CODE 
    

<font color = green>

##  Signs dataset

</font>



In [12]:
# signs data set (fingers)

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

def load_dataset():
    cwd= os.getcwd() # current working directory
    path = os.path.join(cwd,'data') 
    fn =  os.path.join(path , 'train_signs.h5')
    train_dataset = h5py.File(fn, "r")
    X_train = np.array(train_dataset["train_set_x"][:]) # your train set features
    y_train = np.array(train_dataset["train_set_y"][:]) # your train set labels

    fn =  os.path.join(path , 'test_signs.h5')
    test_dataset =  h5py.File(fn, "r")
    X_test = np.array(test_dataset["test_set_x"][:]) # your test set features
    y_test = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    y_train = y_train.reshape((1, y_train.shape[0]))
    y_test = y_test.reshape((1, y_test.shape[0]))
    
    return X_train, y_train, X_test, y_test, classes


In [13]:
X_train, y_train, X_test, y_test, classes = load_dataset()
y_train = y_train.ravel()
y_test = y_test.ravel()
print ('X_train.shape=', X_train.shape)
print ('X_test.shape=', X_test.shape)
print ('y_train.shape=', y_train.shape)
print ('y_test.shape=', y_test.shape)

X_train.shape= (1080, 64, 64, 3)
X_test.shape= (120, 64, 64, 3)
y_train.shape= (1080,)
y_test.shape= (120,)


In [14]:
def display_samples_in_grid(X, n_rows, n_cols= None, y = None ):
    if n_cols is None: 
        n_cols= n_rows
    indices = np.random.randint(0, len(X),n_rows*n_cols)
    for i in range (n_rows): 
        for j in range (n_cols):
            index = n_rows*i+j           
            ax = plt.subplot(n_rows,n_cols,index+1) 
            plt.imshow(X[indices[index]])
            if not (y is None):
                plt.title(y[indices[index]])
            plt.axis('off')

    plt.tight_layout(h_pad=1)


In [15]:
plt.figure()

display_samples_in_grid(X_train, n_rows=4, y= y_train)

<IPython.core.display.Javascript object>

In [16]:
plt.gcf().canvas.set_window_title('Train set')

The set_window_title function was deprecated in Matplotlib 3.4 and will be removed two minor releases later. Use `.FigureManagerBase.set_window_title` or GUI-specific methods instead.
  plt.gcf().canvas.set_window_title('Train set')


In [None]:
# Not in use 
def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

In [18]:
# YOUR_CODE.  Preproces data
# START_CODE 
# END_CODE     
scaler = StandardScaler()

# fn=  os.path.join(path , 'digits.mat')
# mat= loadmat(fn)
# X = mat['X']
# y = mat['y'].ravel()
#  = y%10 # make 10 to 0 

# X_train, X_test, y_train, y_test  =  train_test_split(X, y, random_state= 0)
# X_train, X_test, y_train, y_test  =  train_test_split(X, y, random_state= 0)
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# clf = MLPClassifier(
#     solver= 'lbfgs',
#     random_state = 20,
#     alpha=6
# ).fit(X_train_scaled,y_train)
ns, nx, ny, ni = X_train.shape
X_train_scaled = X_train.reshape((ns,nx*ny*ni))

ns, nx, ny, ni = X_test.shape
X_test_scaled = X_test.reshape((ns,nx*ny*ni)) 


print ("number of training examples = " + str(X_train_scaled.shape[1]))
print ("number of test examples = " + str(X_test_scaled.shape[1]))
print ("X_train_scaled shape: " + str(X_train_scaled.shape))

print ("X_test_scaled shape: " + str(X_test_scaled.shape))

number of training examples = 12288
number of test examples = 12288
X_train_scaled shape: (1080, 12288)
X_test_scaled shape: (120, 12288)


### Expected result 

```
number of training examples = 12288
number of test examples = 12288
X_train_scaled shape: (1080, 12288)
X_test_scaled shape: (120, 12288)
```

In [161]:
#### YOUR_CODE.  Train classifier and evaluate the perfromance on train and test sets 
# START_CODE 
clf= MLPClassifier(
    solver= 'lbfgs',
    random_state=10,
    hidden_layer_sizes=(33, 33),
#     alpha=0.004, 
    alpha=0.004,
    max_iter=512,
).fit(X_train_scaled,y_train)

# batches_size = 10
# lr = 0.0001
# spe = 512
# vs = 32
# epoch = 10

# activations = ('tanh', 'relu')
# for activation, ax in  zip(activations, axes):    
#     clf = MLPClassifier(
#         hidden_layer_sizes=(100,100),
#         solver= 'lbfgs', 
#         random_state = 0,
#         activation = activation, 
#         max_iter= 10000

print("train accuracy= {:.3%}".format(clf.score (X_train_scaled,y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled,y_test)))
# END_CODE     

train accuracy= 100.000%
test accuracy= 86.667%


### Expected result 

```
train accuracy= 100.000%
test accuracy= 90.000%
```

In [108]:
plt.figure()

predicted = clf.predict(X_test_scaled)

display_samples_in_grid(X_test, n_rows=4, y= predicted)


<IPython.core.display.Javascript object>

In [None]:
plt.gcf().canvas.set_window_title('Test set prediction')

### Compare with SVM 

In [None]:
from sklearn.svm import SVC

In [None]:
clf = SVC(C=10, kernel='rbf', gamma=0.001).fit(X_train_scaled, y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train_scaled, y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test_scaled, y_test)))