In [2]:
import numpy as np
from tqdm.auto import tqdm
from math import sqrt
import numpy as np

In [3]:
# model , functions
#use by default ax=1, when the array is 2D
#use ax=0 when the array is 1D
def softmax( x, ax=1 ):
    m = np.max( x, axis=ax, keepdims=True )#max per row
    p = np.exp( x - m )
    return ( p / np.sum(p,axis=ax,keepdims=True) )

def logexp(x):
    return np.log(1 + np.exp(x))
def logexp_d(x):
    a = np.exp(x)
    return a / (1 + a)

def tanh(x):
    a = np.exp(x)
    b = np.exp(-1 * x)
    return (a - b ) / (a + b)

def tanh_d(x):
    return 1 - (tanh(x) ** 2 )

def cos(x):
    return np.cos(x)
def cos_d(x):
    return -np.sin(x)

def identity(x):
    return x
def identity_d(x):
    return np.ones(x.shape)

class Model:
    def __init__(self , l2_reg , n_classes , hidden_size
                 ,n_inputs , activation="logexp" ):
        
        self.n_inputs = n_inputs
        self.n_classes = n_classes
        
        # We add a bias to n_inputs so D needs to be == n_inputs
        self.K = n_classes
        self.M = hidden_size
        self.D = n_inputs
        w1_fan_in = self.D+1
        w2_fan_in = self.M+1
        # TODO proper init
        self.W2 = (np.random.rand(self.K , self.M+1) * 2 - 1) * 1 / sqrt(w2_fan_in) 
        self.W1 = (np.random.rand(self.M , self.D+1) * 2  - 1) * 1 / sqrt(w1_fan_in)
        
        if activation=="logexp":
            self.activation=logexp
            self.activation_d=logexp_d
        elif activation=="tanh":
            self.activation=tanh
            self.activation_d=tanh_d
        elif activation=="cos":
            self.activation=cos
            self.activation_d=cos_d
        else:
            self.activation=identity
            self.activation_d=identity_d
            print("Invalid activation provided, using identity")
            
        self.l2 = l2_reg
        self.h = self.activation
    def parameters(self):
        return [self.W1 , self.W2]
    def set_parameters(self, params):
        self.W1 = params[0]
        self.W2 = params[1]
    
    def __call__(self, _input):
        """
        Does the models forward pass
        """
        pass
    def score(self, _input , _target):
        """
        Returns the loss of the model at _input with target _target.
        """
        # input (Nb, D + 1)
        # target (Nb, K)
        Nb = _input.shape[0]
        # Add ones for bias
        _input = np.concatenate((np.ones((Nb,1), dtype=np.float), _input), axis=-1)
        zetas = self.h(_input @ self.W1.T) # (Nb, M)
        # Add ones for bias
        zetas = np.concatenate((np.ones((Nb,1), dtype=np.float), zetas) , axis=-1) # (Nb, M+1)
        logits = zetas @ self.W2.T
        probabilities = softmax(logits)
        
        # Saving these arrays for gradient computation
        self.T = _target
        self.X = _input
        self.Y = probabilities
        self.Z = zetas
        
        log_likelihood =  np.sum(_target * np.log(probabilities))
        reg_term = - (self.l2 /2) * (np.sum(self.W1 * self.W1) + np.sum(self.W2 * self.W2))
        
        loss = log_likelihood + reg_term
        return loss
    def predict(self, _input):
        """
        Returns the predicted classes 
        """
        # input (Nb, D + 1)
        # target (Nb, K)
        Nb = _input.shape[0]
        _input = np.concatenate((np.ones((Nb,1), dtype=np.float), _input), axis=-1)
        zetas = self.h(_input @ self.W1.T) # (Nb, M)
        zetas = np.concatenate((np.ones((Nb,1), dtype=np.float), zetas) , axis=-1) # (Nb, M+1)
        logits = zetas @ self.W2.T
        probabilities : np.ndarray = softmax(logits)
        return probabilities.argmax(axis=-1)
        
        
    def grads(self):
        """
        REturns the gradients for the model's parameters.
        The output of this method and the output of the @parameters
        method should be both listswith the same length and each 
        element must be a numpy array with the same shapes  
        """
        TminusY = self.T - self.Y
        gradW2 = TminusY.T @ self.Z  - (self.l2 * self.W2)
        A = self.activation_d(self.X @ self.W1.T)
        #gradW1 = (((TminusY @ self.W2[:,1:]).T * A.T) @ self.X) 
        gradW1 = (((TminusY @ self.W2[:,1:]).T) * A.T @ self.X)- (self.l2 * self.W1)
        return [gradW1, gradW2]
 

In [4]:
import random
def gradcheck(model, tol=10e-4, epsilon=10e-6, batch_size=150):
    """
    Performs a numerical gradient check of the model 
    """
    test_input = np.random.rand(batch_size, model.n_inputs)
    test_target = np.zeros((batch_size,model.n_classes))
    # Set each examples class randomly
    for i in range(0,batch_size):
        test_target[i][random.randrange(0,model.n_classes)] =1 

    test_loss = model.score(test_input, test_target)
    test_grad = model.grads()
    # Perform gradient check
    count = 0
    all_ok = True
    
    for param , grad in zip(model.parameters(), test_grad):
        it = np.nditer(param, flags=['multi_index'] , op_flags=['writeonly'])
        for x in tqdm(it):
            previous_value = x
            target_grad = grad[it.multi_index]
            
            # Evalutaing at w + e
            param[it.multi_index] = x + epsilon
            wpe = model.score(test_input, test_target)
            
            # Evaluating at w - e
            param[it.multi_index] = x - 2*epsilon
            wme = model.score(test_input, test_target)
            
            # Resetting the model param
            param[it.multi_index] = previous_value
            
            num_grad = (wpe - wme) / (2 * epsilon)
            if abs(num_grad - target_grad) > tol:
                all_ok = False
                print("num grad " , num_grad)
                print("target grad " , target_grad)
                diff = abs(num_grad - target_grad)
                print("GRADIENT CHECK ERROR " + 
                      "%.2f paramorder %d coordinate %s dif %.2f" % (x, count ,it.multi_index, diff))
        count += 1
                
    if all_ok:
        print("Gradient checks passed!")
    else:
        print("Gradient checks failed")
model = Model(0.01 , n_classes=8, hidden_size=10 , n_inputs=5, activation="logexp")
gradcheck(model, batch_size=3)   

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Gradient checks passed!


In [5]:
#datasets
import os
from sklearn.model_selection import train_test_split
class MnistDataset():
    
    def _parse_mnist_file(self, filepath, label):
        x= None
        y= None
        with open(filepath , 'r') as f:
            lines = f.readlines()
            lines = " ".join(lines)
            x= np.fromstring(lines, dtype=int, sep=' ').reshape(-1 , 28 * 28)
            y = np.zeros((x.shape[0], 10))
            y[: , label] = 1
            
        return x , y
    def train(self):
        return self.train_x , self.train_y
    
    def dev(self):
        return self.dev_x , self.dev_y
    
    def test(self):
        return self.test_x , self.test_y
        
    def __init__(self, path):
        """
        loads the mnist dataset from the path
        """
        self.train_x = []
        self.train_y = []
        
        self.test_x = []
        self.test_y = []
        print("Loading MNIST dataset")
        s = os.listdir(path)
        for filename in s:
            curr_label = int(filename[-5])
            x , y = self._parse_mnist_file(path + "/" + filename, curr_label)
            if 'train' in filename:
                self.train_x.append(x)
                self.train_y.append(y)
            elif 'test' in filename:
                self.test_x.append(x)
                self.test_y.append(y)
            else:
                raise Exception(f"Unexpected mnist file {filename}")
        self.train_x = np.concatenate(self.train_x , axis=0) / 255
        self.train_y = np.concatenate(self.train_y , axis=0)
        
        self.test_x = np.concatenate(self.test_x , axis=0) / 255
        self.test_y = np.concatenate(self.test_y , axis=0)
        
        # Creating dev subsets
        self.train_x, self.dev_x , self.train_y, self.dev_y \
            = train_test_split(self.train_x , self.train_y , test_size=0.3)

import pickle
from itertools import chain
class CifarDataset():
    def _parse_cifar_file(self, filename):
        data = None
        labels = None
        with open(filename, 'rb') as f:
            x = pickle.load(f, encoding='bytes')
            labels = np.array(x[b'labels'])
            data = x[b'data'].astype(np.float)
            
        return data, labels
    
    def train(self):
        return self.train_x , self.train_y
    
    def dev(self):
        return self.dev_x , self.dev_y
    
    def test(self):
        return self.test_x , self.test_y
    
    def __init__(self, path ):
        print("Loading CIFAR-10 dataset")
        self.train_x = []
        self.train_y = []
        
        self.test_x = []
        self.test_y = []
        
        s = os.listdir(path)
        for filename in s:
            if 'data_batch' in filename:
                x , y = self._parse_cifar_file(path + "/" + filename)
                self.train_x.append(x)
                self.train_y.append(y)
            elif 'test_batch' in filename:
                x , y = self._parse_cifar_file(path + "/" + filename)
                self.test_x.append(x)
                self.test_y.append(y)
        
        
        
        self.train_x = np.concatenate(self.train_x , axis=0) / 255
        self.train_y = np.concatenate(self.train_y , axis=0)
        
        self.test_x = np.concatenate(self.test_x , axis=0) / 255
        self.test_y = np.concatenate(self.test_y , axis=0)
        
        # Need to convert labels to the target array ( one hot vectors)
        max_label = max(self.train_y.max() , self.train_y.max())
        one_hot_vectors = np.eye(max_label + 1)
        
        self.train_y = one_hot_vectors[self.train_y]
        self.test_y = one_hot_vectors[self.test_y]
        
        # Creating dev subsets
        self.train_x, self.dev_x , self.train_y, self.dev_y \
            = train_test_split(self.train_x , self.train_y , test_size=0.3)
        

In [6]:
#iteration methods
def train(model , x, y, batch_size , learning_rate, n_epochs , with_tqdm=True):
    # Batched stochastic gradient ascent
    pbar = range(n_epochs)
    if with_tqdm:
        pbar = tqdm(pbar)
    for i in pbar:
        for xb, yb in BatchSampler(x ,y, batch_size , with_tqdm=False):
            loss = model.score(xb,yb)
            params = model.parameters()
            grads = model.grads()
            new_params = [None] * len(params)
            for i in range(len(params)):
                new_params[i] = params[i] + (learning_rate* grads[i])
            model.set_parameters(new_params)
            if with_tqdm:
                pbar.set_postfix_str("loss: %.4f" % (loss))
    

from sklearn.metrics import accuracy_score

def evaluate(model, x , y , with_tqdm=True):
    loss_sum = 0
    y_true = []
    y_pred = []
    for xb, yb in BatchSampler(x ,y, 200 , with_tqdm=with_tqdm):
        curr_y_true = yb.argmax(axis=-1)
        curr_y_pred = model.predict(xb)
        
        y_true.append(curr_y_true)
        y_pred.append(curr_y_pred)
        
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    
    return   1 - accuracy_score(y_true , y_pred)

def BatchSampler(x , y , batch_size, with_tqdm=True):
    num_examples = x.shape[0]
    generator = range(0, num_examples, batch_size)
    
    if with_tqdm:
        generator = tqdm(generator)
        
    for start_idx in generator:
        end_idx = min(num_examples-1 , (start_idx + batch_size) -1 )
        yield x[start_idx:end_idx,:] , y[start_idx:end_idx, :]

In [7]:
class DatasetLoader:
    loaded_dsets = {}
    def get_dataset(self, ds):
        if ds=="mnist":
            if ds not in DatasetLoader.loaded_dsets:
                DatasetLoader.loaded_dsets['mnist'] = MnistDataset("./data/mnist/")
            return DatasetLoader.loaded_dsets['mnist']
            
        elif ds=="cifar":
            if ds not in DatasetLoader.loaded_dsets:
                DatasetLoader.loaded_dsets['cifar'] = CifarDataset("./data/cifar/")
            return DatasetLoader.loaded_dsets['cifar']
        else:
            raise Exception("invalid dataset provided")
DatasetLoader.get_dataset = classmethod(DatasetLoader.get_dataset)

In [8]:
from sklearn.model_selection import train_test_split            
class Scenario:
    def __init__(self, lr , n_epochs , batch_size , l2_reg, hidden_size 
                 , dataset , activation , execute_on_dev, verbose):
        self._dict = {}
        self._dict['learning_rate'] = lr
        self._dict['n_epochs'] = n_epochs
        self._dict['batch_size'] = batch_size
        self._dict['hidden_size'] = hidden_size
        self._dict['l2_reg'] = l2_reg
        self._dict['dataset'] = dataset
        self._dict['activation'] = activation
        self._dict['execute_on_dev'] = execute_on_dev
        self._dict['verbose'] = verbose
        
    def execute(self):
        dataset_str = self['dataset']
        dataset = DatasetLoader.get_dataset(dataset_str)
        
            
        n_classes  = dataset.test()[1].shape[1]
        n_inputs  = dataset.test()[0].shape[1] 
        xt , yt = dataset.train()
        self.model = Model(self['l2_reg'] , n_classes, 
                           self['hidden_size'] , n_inputs , self['activation'])
        train(self.model , xt, yt ,self['batch_size'] ,\
              self['learning_rate'], self['n_epochs'] ,with_tqdm=self['verbose'] )
        
        
        xval , yval = dataset.dev() if self['execute_on_dev'] else dataset.test()
        error = evaluate(self.model , xval , yval, with_tqdm=self['verbose'])
        if self['verbose']:
            print("error is %.2f %%" % (error * 100))
        return error
    
    def __str__(self):
        return str(self._dict)
    def __getitem__(self, x):
        return self._dict[x]
a = Scenario(
             lr=0.001,
             n_epochs=1,
             batch_size=150,
             hidden_size=500,
             l2_reg=0.00001,
             dataset="cifar",
             activation="cos",
             execute_on_dev=True,
             verbose=True
)

In [9]:
print(a)

{'learning_rate': 0.001, 'n_epochs': 1, 'batch_size': 150, 'hidden_size': 500, 'l2_reg': 1e-05, 'dataset': 'cifar', 'activation': 'cos', 'execute_on_dev': True, 'verbose': True}


In [27]:
from itertools import product
def Tune():
    lrs = [0.01 , 0.001 , 0.0001 ]
    n_epochs = [ 0 ]
    batch_sizes = [150]
    hidden_sizes=[100 , 200 , 300]
    l2_regs = [0.0001 , 0.00001 , 0.000001 ]
    datasets=  ["mnist" , "cifar"]
    activations= ["logexp" , "tanh" , "cos"]
    execute_on_dev=[True]
    verbose=[False]
    generator = tqdm(
        list(product(
            lrs , n_epochs , batch_sizes,l2_regs, hidden_sizes,
            datasets , activations, execute_on_dev , verbose)))
    results = []
    for x in generator:
        a = Scenario(*x)
        result = a.execute()
        results.append([result , a])
        
    df = pd.DataFrame(data=results)
    df.to_csv("tune_res")
        
    print("min scenario error " , min(results, key=lambda x : x[0])[0])
    print("min error scenario " , min(results, key=lambda x : x[0])[1])

In [28]:
Tune()

HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


min scenario error  0.7989949748743719
min error scenario  {'learning_rate': 0.01, 'n_epochs': 0, 'batch_size': 150, 'hidden_size': 200, 'l2_reg': 0.0001, 'dataset': 'mnist', 'activation': 'tanh', 'execute_on_dev': True, 'verbose': False}


In [1]:
import pandas as pd

In [4]:
a = pd.read_csv("reeltune")

In [10]:
a

Unnamed: 0.1,Unnamed: 0,0,1
0,0,0.044779,"{'learning_rate': 0.01, 'n_epochs': 50, 'batch..."
1,1,0.070128,"{'learning_rate': 0.01, 'n_epochs': 50, 'batch..."
2,2,0.760469,"{'learning_rate': 0.01, 'n_epochs': 50, 'batch..."
3,3,0.899162,"{'learning_rate': 0.01, 'n_epochs': 50, 'batch..."
4,4,0.903317,"{'learning_rate': 0.01, 'n_epochs': 50, 'batch..."
...,...,...,...
157,157,0.065662,"{'learning_rate': 0.0001, 'n_epochs': 50, 'bat..."
158,158,0.046845,"{'learning_rate': 0.0001, 'n_epochs': 50, 'bat..."
159,159,0.552697,"{'learning_rate': 0.0001, 'n_epochs': 50, 'bat..."
160,160,0.539363,"{'learning_rate': 0.0001, 'n_epochs': 50, 'bat..."


In [17]:
a.columns[2]

'1'

In [24]:
scores = a[a.columns[1]].to_list()

In [25]:
scenarios = a[a.columns[2]].to_list()

In [28]:
sands = [[x , y] for x , y in zip(scores , scenarios)]

In [30]:
sands = sorted(sands , key= lambda x: x[0])

In [31]:
sands

[[0.021440536013400343,
  "{'learning_rate': 0.001, 'n_epochs': 50, 'batch_size': 200, 'hidden_size': 300, 'l2_reg': 1e-06, 'dataset': 'mnist', 'activation': 'cos', 'execute_on_dev': True, 'verbose': False}"],
 [0.022222222222222254,
  "{'learning_rate': 0.001, 'n_epochs': 50, 'batch_size': 200, 'hidden_size': 200, 'l2_reg': 1e-06, 'dataset': 'mnist', 'activation': 'cos', 'execute_on_dev': True, 'verbose': False}"],
 [0.023059743160245727,
  "{'learning_rate': 0.001, 'n_epochs': 50, 'batch_size': 200, 'hidden_size': 200, 'l2_reg': 0.0001, 'dataset': 'mnist', 'activation': 'cos', 'execute_on_dev': True, 'verbose': False}"],
 [0.02322724734785031,
  "{'learning_rate': 0.001, 'n_epochs': 50, 'batch_size': 200, 'hidden_size': 200, 'l2_reg': 0.0001, 'dataset': 'mnist', 'activation': 'tanh', 'execute_on_dev': True, 'verbose': False}"],
 [0.02322724734785031,
  "{'learning_rate': 0.001, 'n_epochs': 50, 'batch_size': 200, 'hidden_size': 200, 'l2_reg': 1e-05, 'dataset': 'mnist', 'activation': '

In [10]:
#MNIST run
a = Scenario(
             lr=0.0001,
             n_epochs=50,
             batch_size=200,
             hidden_size=300,
             l2_reg=1e-05,
             dataset="cifar",
             activation="cos",
             execute_on_dev=False,
             verbose=True
)
a.execute()

Loading CIFAR-10 dataset


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


error is 50.40 %


0.5040201005025126