# Lecture 1

- To utilize free GPU we have to follow Runtime -> Change runtime type -> T4 GPU (very weak, but the only free GPU). This will restart the session.
- Verifying the GPU is assigned correctly.

In [None]:
import torch
torch.cuda.is_available() # the output is true if we run with GPU

True

## Mount the personal drive

- One advantage: connection with other google services such as Google Drive is simple.
- By mounting google drive, we can read our drive data and save working files permanently
- To mount the google drive, we need to run the following code, log in to the google account and copy the authentication code to the input box to finish the process.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


- Each time we run it we need to authorize the access in the pop-up window

## Implementing the Perceptron

- We create the Perceptron class.
- We will be using accuracy score to evaluate the performance.

In [None]:
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
class Perceptron:
    def __init__(self, eta):
      self.w = None # weights
      self.b = None # bias
      self.eta = eta # learning rate


    # heaviside (step) activation function
    def activation(self, z):
      return np.heaviside(z, 1) # 1 is the value for z=0; 1 is the value we want to get for activated function


    # training Perceptron
    def fit(self, X, y, epochs, performance_criterion):
      """
      X - training samples
      y - target
      epochs - number of iterations
      performance_criterion -
      """
      n_features = X.shape[1]

      # initializing weights and bias
      # self.w = np.random.randn(n_features) / np.sqrt(n_features) # small random values
      self.w = np.zeros((n_features)) # zero vector at the begining
      self.b = 0

      # iterating until the number of epochs
      for epoch in range(epochs):
        z = np.dot(X, self.w) + self.b # computing the dot product and adding the bias
        y_pred = self.activation(z) # passing through an activation function
        print(f'Epoch: {epoch}, Accuracy: {performance_criterion(y, y_pred)}')

        # updating weights and bias
        # traversing through the entire training set
        for i in range(len(X)): # for every training sample
          # updating weights and bias
          self.w = self.w + self.eta * (y[i] - y_pred[i]) * X[i]
          self.b = self.b + self.eta * (y[i] - y_pred[i])

      return self.w, self.b


    def predict(self, X):
      z = np.dot(X, self.w) + self.b
      return self.activation(z)

Now, we classify the Iris data set.

In [None]:
from sklearn.datasets import load_iris, load_wine

iris = load_iris()
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [None]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [None]:
iris.data.shape

(150, 4)

In [None]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
# X = iris.data # all columns
X = iris.data[:, (0, 1)] # two features, all the rows
y = (iris.target == 0).astype(int) # we classify setosa against all; setosa was labeled with 0, so we compare this value with others
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Split data in train and test set and normalize.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# we split into training and test sets because we would like to estimate the accuracy/error of our model on unseen data;
# it's generalization capabilities
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                    shuffle=True,
                                                    stratify=y)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Validation test is the portion of the training set. The aim of the validation is to get more stable performance.
Validation idea: if we have some hyperparameters (here it's eta), we use validation to pick value for them. We must tune hyperparameters and we can do it by using cross-validation.

Fit the Perceptron on the training data.

In [None]:
model = Perceptron(0.01)
print(f'Model.w: {model.w}, model.b: {model.b}')

model.fit(X_train, y_train, epochs=10, performance_criterion=accuracy_score)
print(f'Model.w: {model.w}, model.b: {model.b}')

Model.w: None, model.b: None
Epoch: 0, Accuracy: 0.3333333333333333
Epoch: 1, Accuracy: 0.7851851851851852
Epoch: 2, Accuracy: 1.0
Epoch: 3, Accuracy: 1.0
Epoch: 4, Accuracy: 1.0
Epoch: 5, Accuracy: 1.0
Epoch: 6, Accuracy: 1.0
Epoch: 7, Accuracy: 1.0
Epoch: 8, Accuracy: 1.0
Epoch: 9, Accuracy: 1.0
Model.w: [-0.76170168  0.55543045], model.b: -0.6100000000000003


In [None]:
y_train_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

We compute the final training and testing accuracies.

In [None]:
# train accuracy
print(f'Train accuracy: {accuracy_score(y_train, y_train_predicted)}')

# test accuracy
print(f'Test accuracy: {accuracy_score(y_test, y_test_predicted)}')

Train accuracy: 1.0
Test accuracy: 0.9333333333333333


## Better way to estimate the generalization error: **Cross-validation**



In [None]:
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split, StratifiedKFold

kf = StratifiedKFold(n_splits=5) # split set in five subsets
acc = []
fold = 1

for train_idx, test_idx in kf.split(X, y): # we extract each time train and test sets
    print(f'fold: {fold}')
    X_train, y_train = X[train_idx, :], y[train_idx]
    model = Perceptron(0.01)
    X_train = scaler.fit_transform(X_train)
    model.fit(X_train, y_train, epochs=10, performance_criterion=accuracy_score)

    # print(f'\tmodel.w: {model.w}')
    X_test, y_test = X[test_idx, :], y[test_idx]
    X_test = scaler.transform(X_test)
    y_test_predicted = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_test_predicted)
    print(f'test accuracy fold: {accuracy}\n')
    acc.append(accuracy)
    fold += 1

print(f'Mean accuracy: {np.mean(acc)}, std accuracy: {np.std(acc)}')

fold: 1
Epoch: 0, Accuracy: 0.3333333333333333
Epoch: 1, Accuracy: 0.7583333333333333
Epoch: 2, Accuracy: 0.9916666666666667
Epoch: 3, Accuracy: 0.9916666666666667
Epoch: 4, Accuracy: 0.9916666666666667
Epoch: 5, Accuracy: 0.9916666666666667
Epoch: 6, Accuracy: 1.0
Epoch: 7, Accuracy: 1.0
Epoch: 8, Accuracy: 1.0
Epoch: 9, Accuracy: 1.0
test accuracy fold: 1.0

fold: 2
Epoch: 0, Accuracy: 0.3333333333333333
Epoch: 1, Accuracy: 0.7583333333333333
Epoch: 2, Accuracy: 0.9916666666666667
Epoch: 3, Accuracy: 0.9916666666666667
Epoch: 4, Accuracy: 0.9916666666666667
Epoch: 5, Accuracy: 0.9916666666666667
Epoch: 6, Accuracy: 1.0
Epoch: 7, Accuracy: 1.0
Epoch: 8, Accuracy: 1.0
Epoch: 9, Accuracy: 1.0
test accuracy fold: 1.0

fold: 3
Epoch: 0, Accuracy: 0.3333333333333333
Epoch: 1, Accuracy: 0.7666666666666667
Epoch: 2, Accuracy: 0.9916666666666667
Epoch: 3, Accuracy: 0.9916666666666667
Epoch: 4, Accuracy: 1.0
Epoch: 5, Accuracy: 1.0
Epoch: 6, Accuracy: 1.0
Epoch: 7, Accuracy: 1.0
Epoch: 8, Accu

## Exercise
Load the wine dataset and train and evaluate a Perceptron in CV setting.

In [None]:
wine = load_wine()

print(f'Wine dimensions: {wine.data.shape}')
print(f'Feature names: {wine.feature_names}')
print(f'Targets: {wine.target_names}')
print(f'Target array: {wine.target}')

Wine dimensions: (178, 13)
Feature names: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Targets: ['class_0' 'class_1' 'class_2']
Target array: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


Number of instances: 178
Number of attributes: 13

We predict the first class against the other ones.

In [None]:
X = wine.data[:,] # two features
y = (wine.target == 1).astype(int) # we classify class_1 against all; class_1 was labeled with 1, so we compare this value with others
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [None]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    shuffle=True,
                                                    stratify=y)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = Perceptron(0.005)
print(f'Model.w: {model.w}, model.b: {model.b}')

model.fit(X_train, y_train, epochs=10, performance_criterion=accuracy_score)
print(f'Model.w: {model.w}, model.b: {model.b}')

Model.w: None, model.b: None
Epoch: 0, Accuracy: 0.4014084507042254
Epoch: 1, Accuracy: 0.9436619718309859
Epoch: 2, Accuracy: 0.9647887323943662
Epoch: 3, Accuracy: 0.9788732394366197
Epoch: 4, Accuracy: 0.9859154929577465
Epoch: 5, Accuracy: 0.9859154929577465
Epoch: 6, Accuracy: 0.9788732394366197
Epoch: 7, Accuracy: 0.9788732394366197
Epoch: 8, Accuracy: 0.9859154929577465
Epoch: 9, Accuracy: 1.0
Model.w: [-0.25502967 -0.07808429 -0.13332235  0.11805741 -0.01275768 -0.01772074
  0.00882843  0.08629269 -0.0142672  -0.30909354  0.14888763  0.03525025
 -0.24282957], model.b: -0.31500000000000017


In [None]:
kf = StratifiedKFold(n_splits=5) # split set in five subsets
acc = []
learning_rates = [0.001, 0.01, 0.1]

for rate in learning_rates:
    fold = 1
    print(f'Cross-validation with Learning rate = {rate}')
    for train_idx, test_idx in kf.split(X, y): # we extract each time train and test sets
        print(f'fold: {fold}')
        X_train, y_train = X[train_idx, :], y[train_idx]
        model = Perceptron(rate)
        X_train = scaler.fit_transform(X_train)
        model.fit(X_train, y_train, epochs=10, performance_criterion=accuracy_score)

        # print(f'\tmodel.w: {model.w}')
        X_test, y_test = X[test_idx, :], y[test_idx]
        X_test = scaler.transform(X_test)
        y_test_predicted = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_test_predicted)
        print(f'test accuracy fold: {accuracy}\n')
        acc.append(accuracy)
        fold += 1

print(f'Mean accuracy: {np.mean(acc)}, std accuracy: {np.std(acc)}')

Cross-validation with Learning rate = 0.001
fold: 1
Epoch: 0, Accuracy: 0.4014084507042254
Epoch: 1, Accuracy: 0.9577464788732394
Epoch: 2, Accuracy: 0.9859154929577465
Epoch: 3, Accuracy: 0.9788732394366197
Epoch: 4, Accuracy: 0.9859154929577465
Epoch: 5, Accuracy: 0.971830985915493
Epoch: 6, Accuracy: 0.9859154929577465
Epoch: 7, Accuracy: 0.9788732394366197
Epoch: 8, Accuracy: 0.9859154929577465
Epoch: 9, Accuracy: 0.9929577464788732
test accuracy fold: 0.9166666666666666

fold: 2
Epoch: 0, Accuracy: 0.4014084507042254
Epoch: 1, Accuracy: 0.9507042253521126
Epoch: 2, Accuracy: 0.971830985915493
Epoch: 3, Accuracy: 0.9859154929577465
Epoch: 4, Accuracy: 0.9859154929577465
Epoch: 5, Accuracy: 0.9929577464788732
Epoch: 6, Accuracy: 0.9929577464788732
Epoch: 7, Accuracy: 0.9859154929577465
Epoch: 8, Accuracy: 1.0
Epoch: 9, Accuracy: 1.0
test accuracy fold: 0.9444444444444444

fold: 3
Epoch: 0, Accuracy: 0.39436619718309857
Epoch: 1, Accuracy: 0.9225352112676056
Epoch: 2, Accuracy: 0.978