In [1]:
import numpy as np
import logging
import matplotlib.pyplot as plt
from tensorflow.keras import models,layers,regularizers,optimizers


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import torch.nn as nn
import torch
import torch.optim as optim
import numpy as np
import logging
import matplotlib.pyplot as plt
from collections import OrderedDict

In [3]:
torch.cuda.is_available()

True

In [10]:
class MLCA_NN_torch:

    def __init__(self, X_train, Y_train, scaler=None):
        self.M = X_train.shape[1]  # number of items
        self.X_train = X_train  # training set of bundles
        self.Y_train = Y_train  # bidder's values for the bundels in X_train
        self.X_valid = None   # test/validation set of bundles
        self.Y_valid = None  # bidder's values for the bundels in X_valid
        self.model_parameters = None  # neural network parameters
        self.model = None  # keras model, i.e., the neural network
        self.scaler = scaler  # the scaler used for initially scaling the Y_train values
        self.history = None  # return value of the model.fit() method from keras
        self.loss = None  # return value of the model.fit() method from keras
        self.device = 'cpu'

    def initialize_model(self, model_parameters):
        self.model_parameters = model_parameters
        # model parameters is a tuple:(r=regularization_parameters,lr=learning rate for ADAM, dim=number and dimension of hidden layers, dropout=boolean if dropout is used in trainig, dp=dropout rate,epochs=epochs, batch_size=batch_size, regularization_type=regularization_type)
        lr = self.model_parameters['learning_rate']
        architecture = self.model_parameters['architecture']
        dropout = self.model_parameters['dropout']
        dp = self.model_parameters['dropout_prob']

        architecture = [int(layer) for layer in architecture]  # integer check
        number_of_hidden_layers = len(architecture)
        dropout = bool(dropout)
        # -------------------------------------------------- NN Architecture -------------------------------------------------#
        # GET MODEL HERE
        # first hidden layer
        model = nn.Sequential()
        model.add_module('dense_0',nn.Linear(self.M, architecture[0])) 
        model.add_module('relu_0',nn.ReLU())
        if dropout is True: 
            model.add_module("dropout_0", nn.Dropout(p=dp))

        # remaining hidden layer
        for k in range(1, number_of_hidden_layers):
            model.add_module(f"dense_{k}", nn.Linear(architecture[k-1], architecture[k]))
            model.add_module(f"relu_{k}", nn.ReLU())
            if dropout is True:
                model.add_module(f"dropout{k}", nn.Dropout(p=dp))
        # final output layer
        model.add_module(f"dense_{k+1}", nn.Linear(architecture[k], 1))
        model.add_module(f"relu_{k+1}", nn.ReLU())        
        
        # ADAM = adaptive moment estimation a first-order gradient-based optimization algorithm
        self.optimizer = optim.Adam(model.parameters(),lr=lr, betas=(0.9, 0.999), weight_decay=0.0, amsgrad=False)
        self.criterion = nn.MSELoss(reduction='mean')
        self.model = model
        logging.debug('Neural Net initialized')

        
    def __get_reg_loss(self):
        regularization_type = self.model_parameters['regularization_type']
        r = self.model_parameters['regularization']
        w1, w2 = 0,0
        # set regularization
        if regularization_type == 'l2' or regularization_type is None:
            w2 = r
        if regularization_type == 'l1':
            w1 = r
        if regularization_type == 'l1_l2':
            w1,w2 = r, r
        
        l1_regularization, l2_regularization = torch.FloatTensor([0]), torch.FloatTensor([0])

        for param in self.model.parameters():
            l1_regularization += torch.norm(param, 1)**2
            l2_regularization += torch.norm(param, 2)**2
        
        return w1*l1_regularization + w2*l2_regularization

        
    def fit(self, epochs, batch_size, X_valid=None, Y_valid=None):
        # set test set if desired
        self.X_valid = X_valid
        self.Y_valid = Y_valid

        size = self.X_train.shape[0]
        N_iter = size//batch_size + int(bool(size%batch_size))


        X = torch.FloatTensor(self.X_train).to(self.device)
        Y = torch.FloatTensor(self.Y_train).to(self.device)
        self.model.to(self.device)

        self.epoch_losses = {'train':[], 'val':[]}

        for n in range(epochs):

            losses = {'train':[], 'val':[]}
            indices = np.arange(len(X)) 
            np.random.shuffle(indices)

            for i in range(N_iter):
                x = X[indices[i*batch_size: (i+1)*batch_size]]
                y = Y[indices[i*batch_size: (i+1)*batch_size]]

                # Compute prediction and loss
                pred = self.model(x)
                mse_loss = self.criterion(pred.flatten(), y.flatten())
                reg_loss = self.__get_reg_loss()
                loss = mse_loss + reg_loss
                losses['train'].append(loss.item()*len(x))

                # Backpropagation
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            self.epoch_losses['train'].append(np.mean(losses['train']))

            if (self.X_valid is not None) and (self.Y_valid is not None):
                Xval = torch.FloatTensor(self.X_valid).to(self.device)
                Yval = torch.FloatTensor(self.Y_valid).to(self.device)

                size_val = self.X_valid.shape[0]
                N_iter_val = size_val//batch_size + int(bool(size_val%batch_size))

                with torch.no_grad():
                    for i in range(N_iter_val):
                        x = Xval[i*batch_size: (i+1)*batch_size]
                        y = Yval[i*batch_size: (i+1)*batch_size]
                        pred = self.model(x)
                        mse_loss = self.criterion(pred.flatten(), y)
                        reg_loss = self.__get_reg_loss()
                        loss = mse_loss + reg_loss
                        losses['val'].append(loss)

                self.epoch_losses['val'].append(np.mean(losses['val']))

            
#             loss = self.loss_info(batch_size, plot=False)
#         return (loss)
        tr, val = None, None
        tr_orig, val_orig = self.epoch_losses['train'][-1], self.epoch_losses['val']
        return ((tr, val, tr_orig, val_orig))

    def loss_info(self, batch_size, plot=True, scale=None):
        '''
        Returns
        Scalar test loss (if the model has a single output and no metrics) 
        or list of scalars (if the model has multiple outputs and/or metrics). 
        The attribute model.metrics_names will give you the display labels for the scalar outputs.
        '''
        logging.debug('Model Parameters:')
        for k,v in self.model_parameters.items():
            logging.debug(k + ': %s', v)
        tr = None
        tr_orig = None
        val = None
        val_orig = None
        # if scaler attribute was specified
        if self.scaler is not None:
            logging.debug(' ')
            logging.debug('*SCALING*')
            logging.debug('---------------------------------------------')
            # errors on the training set
            tr = self.model.evaluate(x=self.X_train, y=self.Y_train, verbose=0)
            tr_orig = float(self.scaler.inverse_transform([[tr]]))
            if (self.X_valid is not None) and (self.Y_valid is not None):
                # errors on the test set
                val = self.model.evaluate(x=self.X_valid, y=self.Y_valid, verbose=0)
                val_orig = float(self.scaler.inverse_transform([[val]]))
        # data has not been scaled by scaler, i.e., scaler == None
        else:
            tr_orig = self.model.evaluate(x=self.X_train, y=self.Y_train, verbose=0)
            if (self.X_valid is not None) and (self.Y_valid is not None):
                val_orig = self.model.evaluate(x=self.X_valid, y=self.Y_valid, verbose=0)
        # print errors
        if tr is not None:
            logging.info('Train Error Scaled %s', tr)
        if val is not None:
            logging.info('Validation Error Scaled %s', val)
        if tr_orig is not None:
            logging.info('Train Error Orig. %s', tr_orig)
        if val_orig is not None:
            logging.info('Validation Error Orig %s', val_orig)
        logging.debug('---------------------------------------------')

   
        return((tr, val, tr_orig, val_orig))

In [11]:

epochs, batch_size = 10, 30
X_valid=None
Y_valid=None

In [12]:
regularization_N = 1e-5
learning_rate_N = 0.01
layer_N = [16,16,16]
dropout_N = True
dropout_prob_N = 0.05
epochs, batch_size = 10, 4
regularization_type = 'l1'

model_parameters =  OrderedDict([('regularization', regularization_N),
                                ('learning_rate', learning_rate_N),
                                ('architecture', layer_N),
                                ('dropout', dropout_N),
                                ('dropout_prob', dropout_prob_N),
                                ('epochs', epochs),
                                ('batch_size', batch_size),
                                ('regularization_type',
                                 regularization_type)])


nq = 30
X_train = np.float32(np.random.randn(nq,18) > .5)
Y_train = np.random.rand(nq,1) * 70

mlca_nn = MLCA_NN_torch(X_train, Y_train)
mlca_nn.initialize_model(model_parameters)

In [17]:
mlca_nn.model_parameters

OrderedDict([('regularization', 1e-05),
             ('learning_rate', 0.01),
             ('architecture', [16, 16, 16]),
             ('dropout', True),
             ('dropout_prob', 0.05),
             ('epochs', 10),
             ('batch_size', 4),
             ('regularization_type', 'l1')])

In [7]:
losses = mlca_nn.fit(epochs=100, batch_size=30)

In [8]:
losses

(None, None, 8476.424560546875, [])

In [9]:
# self.Models[key] = mlca_nn.model
nnmodel = mlca_nn.model
nnmodel

Sequential(
  (dense_0): Linear(in_features=18, out_features=16, bias=True)
  (relu_0): ReLU()
  (dropout_0): Dropout(p=0.05, inplace=False)
  (dense_1): Linear(in_features=16, out_features=16, bias=True)
  (relu_1): ReLU()
  (dropout1): Dropout(p=0.05, inplace=False)
  (dense_2): Linear(in_features=16, out_features=16, bias=True)
  (relu_2): ReLU()
  (dropout2): Dropout(p=0.05, inplace=False)
  (dense_3): Linear(in_features=16, out_features=1, bias=True)
  (relu_3): ReLU()
)

In [12]:
layer_type=['dense', 'input']
Layer_shapes = []
for i, (name, param) in enumerate(nnmodel.named_parameters()):
    if (i==0) and ('input' in layer_type): 
        Layer_shapes.append(param.shape[1])
    if any([x in name for x in layer_type]) and ('bias' in name):
        Layer_shapes.append(param.shape[0])
        
Layer_shapes

[18, 16, 16, 16, 1]

In [13]:
for name, param in nnmodel.named_parameters():

SyntaxError: unexpected EOF while parsing (<ipython-input-13-8dfccbef6477>, line 1)

In [14]:
[param.data.shape[0] for name, param in nnmodel.named_parameters() 
 if (any([x in name for x in layer_type])) and ('bias' not in name)]

[16, 16, 16, 1]

In [None]:
name.split('.')

In [None]:
class layer():
    def __init__(self,):
        self.input
        self.output

In [None]:
18*16+16*16+16*16+16

In [15]:
for W in kerasmodel.get_weights(): print(W.shape)

NameError: name 'kerasmodel' is not defined

In [None]:
W

In [None]:

weights = []
for params in nnmodel.parameters():
    weights.append(params.detach().cpu().numpy().T)   

In [None]:
w

In [None]:
# UPPER BOUND TORCH
L = 5000
upper_bounds_z = []
for layer in Layer_shapes:
#     print(layer.output.shape)
    upper_bounds_z.append(np.array([L]*layer).reshape(-1, 1))
#     print(upper_bounds_z[-1].shape)
    
# upper_bounds_z

In [None]:
upper_bounds_z

In [None]:
kerasmodel = model

In [None]:
# def _get_model_layers(self, key, layer_type=None):
layer_type=['dense', 'input']
Layers = kerasmodel.layers
if layer_type is not None:
    tmp = [layer.get_config()['name'] for layer in Layers]
    Layers = [Layers[i] for i in [tmp.index(s) for s in tmp if any([x in s for x in layer_type])]]
Layers

In [None]:
# UPPER BOUND KERAS
L = 5000
upper_bounds_z = []
for layer in Layers:
    print(layer.output.shape)
    upper_bounds_z.append(np.array([L]*layer.output.shape[1]).reshape(-1, 1))
#     print(upper_bounds_z[-1].shape)
    
# upper_bounds_z

In [None]:
weights = []
for params in mlca_nn.model.parameters():
    weights.append(params.detach().cpu().numpy().T)

In [None]:
weights

In [None]:
for w in weights:
    print(w.shape)

In [None]:
# how to get weights from keras
r = 1e-5
lr = 0.01
architecture = [16, 16, 16]
dropout = True
dp = 0.2
regularization_type = 'l1'
M = 18
architecture = [int(layer) for layer in architecture]  # integer check
number_of_hidden_layers = len(architecture)
dropout = bool(dropout)

# define input layer
inputs = layers.Input(shape=(X_train.shape[1], ))
# set regularization
REG = regularizers.l1(r)
# first hidden layer
x = layers.Dense(architecture[0], kernel_regularizer=REG, bias_regularizer=REG, activation='relu')(inputs)
if dropout is True:
    x = layers.Dropout(rate=dp)(x)
# remaining hidden layer
for k in range(1, number_of_hidden_layers):
    x = layers.Dense(architecture[k], kernel_regularizer=REG, bias_regularizer=REG, activation='relu')(x)
    if dropout is True:
        x = layers.Dropout(rate=dp)(x)
# final output layer
predictions = layers.Dense(1, activation='relu')(x)
model = models.Model(inputs=inputs, outputs=predictions)

In [None]:
len(model.get_weights())

In [None]:
for w in model.get_weights():
    print(w.shape)

In [None]:
mlca_nn.model[0].weight.data.T.numpy().shape[0]

In [None]:
Layers = kerasmodel.layers
[layer.get_config()['name'] for layer in Layers]

In [None]:
layer.output.shape[1]

In [None]:
upper_bounds_z

In [None]:
tmp = [layer.get_config()['name'] for layer in Layers]
Layers = [Layers[i] for i in [tmp.index(s) for s in tmp if any([x in s for x in layer_type])]]