## Practical Session 2

[Source](https://documents.epfl.ch/users/f/fl/fleuret/www/dlc/dlc-practical-2.pdf)

In [1]:
import torch
from torch import Tensor
import dlc_practical_prologue as prologue

#### 1. Nearest neighbor

In [2]:
def nearest_classification(train_input, train_target, x):
    """
    train_input  : n x d
    train_target : n x 1
    x            : 1 x d
    """
    
    euclidean_dist = (train_input - x).pow(2).sum(1)
    min_v, min_ix = torch.min(euclidean_dist, 0)
    min_ix = min_ix[0]
    
    return train_target[min_ix]

#### 2. Error estimation

In [33]:
def compute_nb_errors(train_input, train_target, test_input, test_target,
                      mean = None, proj = None):
    
    """
    train_input  : n x d
    train_target : n x 1
    test_input   : m x d
    test_target  : m x 1
    mean         : None OR d x 1
    proj         : None OR c x d
    """
    
    nr_errors = 0
    
    if mean is not None:
        
        train_input = train_input - mean
        test_input = test_input - mean
        
    if proj is not None:
        # proj has to be transposed for the dimensions to be correct
        train_input = train_input.mm(proj.t())
        test_input = test_input.mm(proj.t())
        
    for row in range(test_input.size(0)):
        pred_target = nearest_classification(train_input,train_target,
                                             test_input[row])
        
        test_target = test_target[row]
        
        if test_target != pred_target: nr_errors+=1
            
    return nr_errors

#### 3. PCA

In [112]:
def PCA(x):
    """
    x : n x d
    """
    
    # Remove the mean from data
    mean = x.mean(0)
    x = x - mean
    
    # Create covariance matrix
    cov_mx = 1/(x.size(1)) * x.mm(x.t())
    
    # Get the eigen values and eigen vectors
    eig_val, eig_vect = torch.eig(cov_mx, eigenvectors=True)
        
    # Sort the eigen values in descending order
    sorted_indices = eig_val[:, 0].abs().sort(0, True)[1]
    
    # Return the basis with the sorted eigen vectors
    eig_vect = eig_vect[sorted_indices]
    
    return (mean, eig_vect)

NOTE: This vector column notation is ew.