<a href="https://colab.research.google.com/github/SandytheTraveller/Deep-Learning-Course/blob/main/Lecture_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lecture 3

In [None]:
def plot_decision_boundary(X, y, w, title):
    # w - current weights
    # X - Input vectors
    # y - input labels
    plt.figure(figsize=(10, 10))
    plt.rc('axes', labelsize=14) # fontsize of the x and y labels
    plt.rc('xtick', labelsize=14) # fontsize of the tick labels
    plt.rc('ytick', labelsize=14)
    w = model.w
    m = -w[0]/w[1] # slope
    print(f'w:{w}, m:{m}')

    last_x = X[len(y) - 1, :]
    print(f'last_x:{last_x}')

    plt.suptitle(title+'_i_'+str(len(X)), fontsize=20)
    plt.ylim(-3, 3)
    plt.xlim(-3, 3)

    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bo", label="Neg", markersize=15)
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "ro", label="Pos", markersize=15)
    xx = np.linspace(-3,3)
    plt.xlabel("X0")
    plt.ylabel("X1")
    #drawing hyperplane
    plt.plot(xx, m*xx, 'r-') # hyperplane

    plt.legend(loc="upper left",prop={'size': 15})
    plt.grid()
    # scaling to have norm 2
    #scale=np.sqrt(1/(w[0]**2+w[1]**2))

    # drawing vector throug origin and w
    #plt.arrow(0,0, scale*w[0],scale*w[1], linestyle="--",
    plt.arrow(0,0, w[0], w[1], linestyle="--",
          head_width = 0.09,
          width = 0.02,
          ec ='red')
    # drawing vector through origin and current sample
    plt.arrow(0,0, last_x[0],last_x[1], linestyle="--",
          head_width = 0.09,
          width = 0.02,
          ec ='green')
    #plt.pause(5)
    plt.show()
    plt.close()

- To visuzlize the hyperplane during the training, we write the fit_print method that during training visualizes the hyperplane after each sample update
- Unlike the fit method, this is a sequential update
  - Within each epoch any weight update is right away used for the prediction of the next sample
  - To better visualize the change, no bias is used during the training

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from sklearn.metrics import accuracy_score

class Perceptron:
    def __init__(self, eta):
        self.w = None
        #self.b = None
        self.b = 0
        self.eta = eta

    # heaviside (step) activation function
    def activation(self, z):
        return np.heaviside(z, 0) # heaviside(z), 0 is the value for z=0

    #  training Perceptron
    def fit(self, X, y, epochs, performance_criterion):
        n_features = X.shape[1]
        # Initializing weights and bias
        #self.w = np.random.randn(n_features)/np.sqrt(n_features)
        self.w = np.zeros((n_features))
        self.b = 0

        # Iterating until the number of epochs
        for epoch in range(epochs):
          # self.w - current hyperplane
          # we are predicting ALL our training samples before any update; parallel update
          # any instance is predicted regardless of the updates of other instances
          # sequential update - simulate online training
            z = np.dot(X, self.w) + self.b # Computing the dot product and adding the bias
            y_pred = self.activation(z) # Passing through an activation function
            # Traversing through the entire training set
            for i in range(len(X)):
                #Updating weights and bias
                self.w = self.w + self.eta * (y[i] - y_pred[i]) * X[i]
                self.b = self.b + self.eta * (y[i] - y_pred[i])
            print(f"\t epoch:{epoch}, accuracy:{performance_criterion(y, y_pred)}")
        return self.w, self.b

    def fit_print(self, X, y, epochs):
      # here we receive the instance, update the hyperplane, then receive the next one
        n_features = X.shape[1]
        n_samples = X.shape[0]
        # Initializing weights and bias
        #self.w = np.random.randn(n_features)/np.sqrt(n_features)
        self.w = np.zeros((n_features)) + 0.1 # non zero init
        self.b = 0

        # Iterating until the number of epochs
        for epoch in range(epochs):
            for i in range(n_samples):
              if i > 0:
                plot_decision_boundary(X[:(i+1), :],y[:(i+1)], self.w, "initial")

              z = np.dot(X[i,:], self.w) + self.b
              y_pred = self.activation(z)
              #Updating weights and bias
              self.w = self.w + self.eta * (y[i] - y_pred) * X[i]
              #self.b = self.b + self.eta * (y[i] - y_pred[i])
              if i > 0:
                plot_decision_boundary(X[:(i+1), :],y[:(i+1)], self.w,"update")

            # Traversing through the entire training set

        return self.w, self.b

    def predict(self, X):
        z = np.dot(X, self.w) + self.b
        return self.activation(z)

Now we classify the iris dataset

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
iris.feature_names

In [None]:
iris.target_names

In [None]:
iris.data.shape

In [None]:
iris.target

In [None]:
# X = iris.data # all columns
X = iris.data[:, (0, 1)] # two features
y = (iris.target == 0).astype(int) # we classify setosa against all
y

Now we split the data in Train and Test set and normalize.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True, stratify=y)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Fit the perceptron on the training data.

In [None]:
## learning rate 1 (too high for a practical choice, here chosen just for the plot)
model = Perceptron(1)
print(f'model.w: {model.w}, model.b: {model.b}')
# model.fit(X_train, y_train)
model.fit_print(X_train, y_train, epochs=1)
print(f'model.w: {model.w}, model.b: {model.b}')
y_train_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

Better way to estimate the generalization error: cross-validation.

In [None]:
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score, StratifiedKFold

kf = StratifiedKFold(n_splits=5)
acc = []
fold = 1

for train_idx, test_idx in kf.split(X, y):
    print(f"fold:{fold}")
    X_train, y_train = X[train_idx,:], y[train_idx]
    model = Perceptron(0.01)
    X_train = scaler.fit_transform(X_train)
    model.fit(X_train, y_train, epochs=10,
              performance_criterion=accuracy_score)
    #print(f"\tmodel.w:{model.w}")
    X_test, y_test = X[test_idx,:], y[test_idx]
    X_test = scaler.transform(X_test)
    y_test_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_test_pred)
    print(f"test accuracy fold:{accuracy}\n")
    acc.append(accuracy)
    fold+=1

print(f"mean accuracy:{np.mean(acc)}, sdt accuracy:{np.std(acc)}")

Plotting the perceptron boundary trained on last fold

In [None]:
X_norm = scaler.fit_transform(X)
plot_decision_boundary(X_norm,y, model.w,"Final")

# **Exercise**
* ### By leveraging the perceptron class, implement a new class adaline, and modifiy the corresponding fit method to implement the adaline learning rule
* ### Test it on iris and wine data

In [None]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))


class Adaline:
    def __init__(self, eta):
        self.w = None
        #self.b = None
        self.b = 0
        self.eta = eta

    # heaviside (step) activation function
    def activation(self, z):
        return np.heaviside(z, 0) # heaviside(z), 0 is the value for z=0

    #  training Perceptron
    def fit(self, X, y, epochs, performance_criterion):
        n_features = X.shape[1]
        # Initializing weights and bias
        #self.w = np.random.randn(n_features)/np.sqrt(n_features)
        self.w = np.zeros((n_features))
        self.b = 0

        # Iterating until the number of epochs
        for epoch in range(epochs):
            z = np.dot(X, self.w) + self.b # Computing the dot product and adding the bias
            y_pred = self.activation(z) # Passing through an activation function
            # Traversing through the entire training set
            for i in range(len(X)):
                #Updating weights and bias
                if y_pred[i] != y[i]: # if the prediction is wrong
                    self.w = self.w + self.eta * (y[i] - z[i]) * X[i]
                    self.b = self.b + self.eta * (y[i] - z[i])
            print(f"\t epoch:{epoch}, accuracy:{performance_criterion(y, y_pred)}")
        return self.w, self.b


    def predict(self, X):
        z = np.dot(X, self.w) + self.b
        return self.activation(z)

In [None]:
from sklearn.datasets import load_iris, load_wine

iris = load_iris()
wine = load_wine()

In [None]:
# testing on iris data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, shuffle=True)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


model = Perceptron(0.01)
print(f'model.w: {model.w}, model.b: {model.b}')

model.fit_print(X_train, y_train, epochs=1)
print(f'model.w: {model.w}, model.b: {model.b}')

y_train_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

In [None]:
wine.feature_names

In [None]:
wine.target_names

In [None]:
wine.data.shape

In [None]:
wine.target

In [None]:
# X = iris.data # all columns
X = wine.data # two features
y = (wine.target == 1).astype(int)
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y)

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


model = Perceptron(0.01)
print(f'model.w: {model.w}, model.b: {model.b}')

model.fit_print(X_train, y_train, epochs=1)
print(f'model.w: {model.w}, model.b: {model.b}')

y_train_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

## Generalization capabilities of Adaline

In [None]:
kf = StratifiedKFold(n_splits=5)
acc = []
fold=1

for train_idx, test_idx in kf.split(X, y):
    print(f"fold:{fold}")
    X_train, y_train = X[train_idx,:], y[train_idx]
    model_A = Adaline(0.001)
    X_train = scaler.fit_transform(X_train)
    model_A.fit(X_train, y_train, epochs=10,
              performance_criterion=accuracy_score)
    #print(f"\tmodel.w:{model.w}")
    X_test, y_test = X[test_idx,:], y[test_idx]
    X_test = scaler.transform(X_test)
    y_test_pred = model_A.predict(X_test)
    accuracy = accuracy_score(y_test, y_test_pred)
    print(f"test accuracy fold:{accuracy}\n")
    acc.append(accuracy)
    fold+=1

print(f"mean accuracy:{np.mean(acc)}, sdt accuracy:{np.std(acc)}")

Decision boundary of ADALINE trained on last fold

In [None]:
X_norm = scaler.fit_transform(X)
plot_decision_boundary(X_norm, y, model_A.w,"Final")