In [1]:
import numpy as np
import os

In [2]:
def load_data(train = True):
    """
    Load the data from disk
    Parameters
    ----------
    train : bool
        Load training data if true, else load test data
    Returns
    -------
        Tuple:
            Images
            Labels
    """
    directory = 'train' if train else 'test'
    patterns = np.load(os.path.join('./data/', directory, 'images.npz'))['arr_0']
    labels = np.load(os.path.join('./data/', directory, 'labels.npz'))['arr_0']
    return patterns.reshape(len(patterns), -1), labels

In [3]:
def generate_minibatches(dataset, batch_size=64):
    X, y = dataset
    l_idx, r_idx = 0, batch_size
    while r_idx < len(X):
        yield X[l_idx:r_idx], y[l_idx:r_idx]
        l_idx, r_idx = r_idx, r_idx + batch_size

    yield X[l_idx:], y[l_idx:]

def generate_k_fold_set(dataset, k = 10): 
    X, y = dataset

    order = np.random.permutation(len(X))
    
    fold_width = len(X) // k

    l_idx, r_idx = 0, fold_width

    for i in range(k):
        train = np.concatenate([X[order[:l_idx]], X[order[r_idx:]]]), np.concatenate([y[order[:l_idx]], y[order[r_idx:]]])
        validation = X[order[l_idx:r_idx]], y[order[l_idx:r_idx]]
        yield train, validation
        l_idx, r_idx = r_idx, r_idx + fold_width

In [4]:
train=load_data(train = True)
test=load_data(train = False)

In [5]:
def z_score_normalize(X, u = None, xd = None):
    """
    Performs z-score normalization on X. 
    f(x) = (x - μ) / σ
        where 
            μ = mean of x
            σ = standard deviation of x
    Parameters
    ----------
    X : np.array
        The data to z-score normalize
    u (optional) : np.array
        The mean to use when normalizing
    sd (optional) : np.array
        The standard deviation to use when normalizing
    Returns
    -------
        Tuple:
            Transformed dataset with mean 0 and stdev 1
            Computed statistics (mean and stdev) for the dataset to undo z-scoring.
    """
    if u == None:
        mean=np.mean(X, axis=0)
    else:
        mean=u
    if xd==None:
        std=np.std(X, axis=0)
    else:
        std=xd
        
    X = (X - mean) / std
    
    return (X,mean,std)

In [6]:
def min_max_normalize(X, _min = None, _max = None):
    """
    Performs min-max normalization on X. 
    f(x) = (x - min(x)) / (max(x) - min(x))
    Parameters
    ----------
    X : np.array
        The data to min-max normalize
    _min (optional) : np.array
        The min to use when normalizing
    _max (optional) : np.array
        The max to use when normalizing
    Returns
    -------
        Tuple:
            Transformed dataset with all values in [0,1]
            Computed statistics (min and max) for the dataset to undo min-max normalization.
    """
    if _min == None:
        _min=np.min(X,axis=0)
    if _max == None:
        _max=np.max(X,axis=0)

        
    X= (X-_min)/(_max - _min)
    
    return (X,_min,_max)

In [7]:
def onehot_encode(y):
    """
    Performs one-hot encoding on y.
    Ideas:
        NumPy's `eye` function
    Parameters
    ----------
    y : np.array
        1d array (length n) of targets (k)
    Returns
    -------
        2d array (shape n*k) with each row corresponding to a one-hot encoded version of the original value.
    """
    if len(np.unique(y))>2:
        values = y
        n_values = np.max(values) + 1
        return np.eye(n_values)[values] 
    else:
        value = np.max(y)
        return (y == value).astype(int).reshape(len(y),1) 

In [8]:
def shuffle(dataset):
    """
    Shuffle dataset.
    Make sure that corresponding images and labels are kept together. 
    Ideas: 
        NumPy array indexing 
            https://numpy.org/doc/stable/user/basics.indexing.html#advanced-indexing
    Parameters
    ----------
    dataset
        Tuple containing
            Images (X)
            Labels (y)
    Returns
    -------
        Tuple containing
            Images (X)
            Labels (y)
    """
    dim=dataset[0].shape[0]
    index1=np.arange(dim)
    np.random.shuffle(index1)
    shuffled_data=dataset[0][index1]
    shuffled_label=dataset[1][index1]
    return (shuffled_data,shuffled_label)

In [9]:
def append_bias(X):
    """
    Append bias term for dataset.
    Parameters
    ----------
    X
        2d numpy array with shape (N,d)
    Returns
    -------
        2d numpy array with shape ((N+1),d)
    """
    new=np.ones((X.shape[0],X.shape[1]+1))
    new[:,:-1] = X
    return new

In [1]:
def sigmoid(a):
    """
    Compute the sigmoid function.

    f(x) = 1 / (1 + e ^ (-x))

    Parameters
    ----------
    a
        The internal value while a pattern goes through the network
    Returns
    -------
    float
       Value after applying sigmoid (z from the slides).
    """
    clipped=[]
    a=a.flatten()
    for i in range(len(a)):
        if a[i]>20:
            clipped.append(20)
        elif a[i]<-20:
            clipped.append(-20)
        else:
            clipped.append(a[i])
    clipped=np.array(clipped)
    return 1/(1+np.exp(-clipped))

def softmax(a):
    """
    Compute the softmax function.

    f(x) = (e^x) / Σ (e^x)

    Parameters
    ----------
    a
        The internal value while a pattern goes through the network
    Returns
    -------
    float
       Value after applying softmax (z from the slides).
    """
#     a=np.clip(a,-20,20)
#     denominator=0
#     for i in range(a.shape[1]):
#         denominator+=np.exp(a[:,i])
#     result=[]
#     for i in range(a.shape[1]):
#         result.append(np.exp(a[:,i])/denominator)
        
#     return np.array(result).T
    a=np.clip(a,-20,20)
    a_exp = np.exp(a)
    partition = np.sum(a_exp, axis=1).reshape(-1,1)
    return a_exp / partition

def binary_cross_entropy(y, t):
    """
    Compute binary cross entropy.

    L(x) = t*ln(y) + (1-t)*ln(1-y)

    Parameters
    ----------
    y
        The network's predictions
    t
        The corresponding targets
    Returns
    -------
    float 
        binary cross entropy loss value according to above definition
    """
    vectorized=t*np.log(y)+(1-t)*np.log(1-y)
    return -np.mean(vectorized)

def multiclass_cross_entropy(y, t):
    """
    Compute multiclass cross entropy.

    L(x) = - Σ (t*ln(y))

    Parameters
    ----------
    y
        The network's predictions
    t
        The corresponding targets
    Returns
    -------
    float 
        multiclass cross entropy loss value according to above definition
    """
    entropy=np.zeros(t.shape[0])
    for i in range(10):
        target_column=t[:,i]
        prediction_column=y[:,i]
        entropy+=target_column*np.log(prediction_column)
    return -np.mean(entropy/10)

class Network:
    def __init__(self, hyperparameters, activation, loss, out_dim):
        """
        Perform required setup for the network.

        Initialize the weight matrix, set the activation function, save hyperparameters.

        You may want to create arrays to save the loss values during training.

        Parameters
        ----------
        hyperparameters
            A Namespace object from `argparse` containing the hyperparameters
        activation
            The non-linear activation function to use for the network
        loss
            The loss function to use while training and testing
        """
        self.hyperparameters = hyperparameters
        self.activation = activation
        self.loss = loss
        self.out_dim=out_dim

        self.weights = np.zeros((28*28+1, out_dim))

    def forward(self, X):
        """
        Apply the model to the given patterns

        Use `self.weights` and `self.activation` to compute the network's output

        f(x) = σ(w*x)
            where
                σ = non-linear activation function
                w = weight matrix

        Make sure you are using matrix multiplication when you vectorize your code!

        Parameters
        ----------
        X
            Patterns to create outputs for
            
        """
        product=np.matmul(X,self.weights)
        return self.activation(product)

    def __call__(self, X):
        return self.forward(X)

    def train(self, minibatch):
        """
        Train the network on the given minibatch

        Use `self.weights` and `self.activation` to compute the network's output
        Use `self.loss` and the gradient defined in the slides to update the network.

        Parameters
        ----------
        minibatch
            The minibatch to iterate over

        Returns
        -------
        tuple containing:
            average loss over minibatch
            accuracy over minibatch
        """
        X, y = minibatch
        
        # To train the model, we have several things to do:
        # We will read of the learning rate, and bath size.
        # Then, based on the specification of the user,normalize the data.
        
        learning_rate=self.hyperparameters[0]
        batch_size_x=self.hyperparameters[1]
        
        X,mean_X,std_X=self.hyperparameters[2](X) # data Normalization and append bias 
        X=append_bias(X)
        
        
        dataset=(X,y)
        
        # Keep track of our model performance over epoches 
        accuracies_10= []
        training_losses_10 = []
        validation_losses_10=[]
        val_accuracies_10=[]
        
        for training, validation in generate_k_fold_set(dataset):
            
            self.weights = np.zeros((28*28+1, self.out_dim))
            
            #Keep track of the statistics for the single val process
            accuracies = []
            training_losses = []
            validation_losses=[]
            val_accuracies=[]
            
            # train our model with maximum 100 epoches allowed 
            for epoch in range(18):
                training=shuffle(training)
                # Generate mini_bathch with specified 
                for X_train,y_train in generate_minibatches(training,batch_size=batch_size_x):

                    # Compute the output of our model with given weight
                    # This should output a 10*data_size matrix, 
                    # where each column represents probability distribution

                    predictions=self.forward(X_train)#.reshape((-1,self.out_dim))
                    
                    
    
                    # One-hot encode the labels for which we can compute the error rate 
                    # Notice that the dimension of the one-hot encoded target is datasize*10
                    target=onehot_encode(y_train)#.reshape((-1,self.out_dim))
                     
      
                    error_signal=(target-predictions)
        
            
                    gradient=-1*(np.matmul(error_signal.T,X_train))
                    #gradient=gradient.flatten().reshape((-1,self.out_dim))
                    
                
                    # Upadte the weight matrix with gradient descent
                    
                    self.weights=self.weights-learning_rate*gradient.T
                          

                # Evaluate our model's performance at the end of each epoches
                # Evaluate at the entire training dataset.
                #print(training[0])
                prediction_epoch=self.forward(training[0])
                
                if self.activation==softmax:
                    encoded_target=onehot_encode(training[1])
                    training_losses_n=multiclass_cross_entropy(prediction_epoch,encoded_target)
                    training_losses.append(training_losses_n)
                    # compute the accuracy
                    predicted_labels=np.argmax(prediction_epoch,axis=1)
                    labels=np.argmax(encoded_target,axis=1)
                    single_accuracy=np.mean(predicted_labels==labels)
                    print(single_accuracy)
                    accuracies.append(single_accuracy)
                else:
                    encoded_target=onehot_encode(training[1]).flatten()
                    training_losses_n=binary_cross_entropy(prediction_epoch,encoded_target)
                    training_losses.append(training_losses_n)
                    #print(training_losses_n)
                    # compute the accuracy 
                    predicted_labels=1*(prediction_epoch>0.5)
                    single_accuracy=np.mean(predicted_labels==encoded_target)
                    accuracies.append(single_accuracy)
                    
                # Keep track of the validation losses 
                val_predict=self.forward(validation[0])
                encoded_val_target=onehot_encode(validation[1])

                if self.activation==softmax:
                    val_losses=multiclass_cross_entropy(val_predict,encoded_val_target)
                    validation_losses.append(val_losses)
                    # compute the accuracy
                    predicted_labels_val=np.argmax(val_predict,axis=1)
                    labels_val=np.argmax(encoded_val_target,axis=1)
                    single_accuracy_val=np.mean(predicted_labels_val==labels_val)
                    val_accuracies.append(single_accuracy_val)
                else:
                    encoded_val_target=onehot_encode(validation[1]).flatten()
                    val_losses=binary_cross_entropy(val_predict,encoded_val_target)
                    validation_losses.append(val_losses) 
                    predicted_labels_val=1*(val_predict>0.5)
                    single_accuracy_val=np.mean(predicted_labels_val==encoded_val_target)
                    #print(single_accuracy_val)
                    val_accuracies.append(single_accuracy_val)
                    
                
            accuracies_10.append(accuracies)
            training_losses_10.append(training_losses)
            validation_losses_10.append(validation_losses)
            val_accuracies_10.append(val_accuracies)    
            
        return accuracies_10,training_losses_10,validation_losses_10,val_accuracies_10
                

    def test(self, minibatch):
        """
        Test the network on the given minibatch

        Use `self.weights` and `self.activation` to compute the network's output
        Use `self.loss` to compute the loss.
        Do NOT update the weights in this method!

        Parameters
        ----------
        minibatch
            The minibatch to iterate over

        Returns
        -------
            tuple containing:
                average loss over minibatch
                accuracy over minibatch
        """
        X, y = minibatch
        # Remember to normalize the data and append bias 
        X,mean_X,std_X=self.hyperparameters[2](X)
        X=append_bias(X)
        prediction_test=self.forward(X)
        encoded_target=onehot_encode(y).flatten()
        accuracy=None
        losses=None
        
        if self.activation==softmax:
            losses=multiclass_cross_entropy(prediction_test,encoded_target)
            # compute the accuracy
            predicted_labels_test=np.argmax(prediction_test,axis=1)
            labels_test=np.argmax(encoded_target,axis=1)
            accuracy=np.mean(predicted_labels_test==labels_test)
        else:
            losses=binary_cross_entropy(prediction_test,encoded_target)
            # compute the accuracy 
            predicted_labels_test=1*(prediction_test>0.5)
            accuracy=np.mean(predicted_labels_test==encoded_target)
        return accuracy,losses

In [91]:
hyperparameters_1=[0.001,300,z_score_normalize]
hyperparameters_2=[0.01,300,min_max_normalize]

In [92]:
# Just for logistic regression 
data_0=train[0][train[1]==2]
labels_0=train[1][train[1]==2]
data_6=train[0][train[1]==6]
labels_6=train[1][train[1]==6]
test_0=test[0][test[1]==2]
test_labels_0=test[1][test[1]==2]
test_6=test[0][test[1]==6]
test_labels_6=test[1][test[1]==6]

In [93]:
data_logistic=np.array(list(data_0)+list(data_6))
label_logistic=np.array(list(labels_0)+list(labels_6))
logistic_dataset=(data_logistic,label_logistic)
test_logistic=np.array(list(test_0)+list(test_6))
test_label_logistic=np.array(list(test_labels_0)+list(test_labels_6))
logistic_test=(test_logistic,test_label_logistic)

In [100]:
logistic_regression=Network(hyperparameters_1,sigmoid, binary_cross_entropy,1)

In [101]:
logistic_regression.train(logistic_test)

ValueError: operands could not be broadcast together with shapes (785,300) (785,90000) 

In [96]:
#logistic_regression.test(logistic_test)

In [2]:
softmax_regression=Network(hyperparameters_1,softmax, multiclass_cross_entropy,10)

NameError: name 'hyperparameters_1' is not defined

In [106]:
softmax_regression.train(train)

0.8186666666666667
0.8247037037037037
0.8237037037037037
0.8296666666666667
0.8277592592592593
0.8263148148148148


KeyboardInterrupt: 