In [273]:
import numpy as np
import pandas as pd

In [274]:
class MLP():
    def __init__(self, num_inputs=9, hidden_layers=[9, 9], num_outputs=3):
        """
            num_inputs (int)
            hidden_layers (list): list of ints for custom hidden layers
            num_outputs (int)
        """
        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        layers = [num_inputs] + hidden_layers + [num_outputs]

        weights = []
        derivatives = []
        activations = []
        
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            weights.append(w)
        self.weights = weights

        # derivatives per layer        
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # activations per layer        
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations

    def forward_propagate(self, inputs):
        activations = inputs
        self.activations[0] = activations

        # iterate through the network layers
        for i, w in enumerate(self.weights):
            net_inputs = np.dot(activations, w)

            activations = self._sigmoid(net_inputs)
            # i+1: because activation = input of next neuron
            self.activations[i + 1] = activations

        return activations

    def back_propagate(self, error):
        """error (array): error to backpropagate"""
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]

            delta = error * self._sigmoid_derivative(activations)
            delta_re = delta.reshape(delta.shape[0], -1).T

            current_activations = self.activations[i]
            current_activations = current_activations.reshape(current_activations.shape[0],-1)

            self.derivatives[i] = np.dot(current_activations, delta_re)
            error = np.dot(delta, self.weights[i].T)

    def train(self, inputs, targets, epochs, learning_rate):
        """
            inputs (array): X
            targets (array): Y
            epochs (int): Num Epochs
            learning_rate (float)
        """
        for i in range(epochs):
            sum_errors = 0

            for j, input in enumerate(inputs):
                target = targets[j]
                output = self.forward_propagate(input)

                error = target - output
                self.back_propagate(error)
                
                # update the weights w/ gradient descent
                self.gradient_descent(learning_rate)

                sum_errors += self._mse(target, output)
                
            # Report training error (o parecerá que se cuelga)
            print("Error: {} en epoch {}".format(sum_errors / len(items), i+1))

        print("Finished training")
        print()

    def gradient_descent(self, learningRate=0.5):
        # update weights
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learningRate

    def _sigmoid(self, x):
        y = 1.0 / (1 + np.exp(-x))
        return y

    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)

    def _mse(self, target, output):
        """Mean Squared Error"""
        return np.average((target - output) ** 2)

### Load and process dataset

In [275]:
datos_pinguinos = pd.read_csv('./penguins_cleaned.csv',header=0)
# data.columns = ['explicitly','rename','headers']

datos_pinguinos.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181,3750,male
1,Adelie,Torgersen,39.5,17.4,186,3800,female
2,Adelie,Torgersen,40.3,18.0,195,3250,female
3,Adelie,Torgersen,36.7,19.3,193,3450,female
4,Adelie,Torgersen,39.3,20.6,190,3650,male


### Normalizar y codificar

In [276]:
df = datos_pinguinos.copy()
encode = ['sex', 'island', 'species']

for col in encode:
    dummy = pd.get_dummies(df[col], prefix=col)
    df = pd.concat([df, dummy], axis=1)
    del df[col]

In [277]:
# Dataframe codificado
# df

In [278]:
# Datos de ingreso excluyen la especie, y la salida las contiene
input_df = df.drop(['species_Adelie','species_Chinstrap','species_Gentoo'], axis=1)
output_df = df.filter(['species_Adelie','species_Chinstrap','species_Gentoo'], axis=1)

# Separar columnas, y normalizar datos numericos para garantizar la convergencia
df_numbers = input_df.filter(['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g'])
df_genders = input_df.filter(['sex_female','sex_male'])
df_islands = input_df.filter(['island_Biscoe','island_Dream','island_Torgersen'])

# hold variable / copy
norm_df = df_numbers.copy()

# mean normalizacion
normalized_input = (norm_df-norm_df.mean())/norm_df.std()

# min_max normalizacion
# normalized_df=(norm_df-norm_df.min())/(norm_df.max()-norm_df.min())

formatted_input = pd.concat([normalized_input, df_genders], axis=1)
formatted_input = pd.concat([formatted_input, df_islands], axis=1)

# Mostrar dataframe completo y normalizado
formatted_input

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex_female,sex_male,island_Biscoe,island_Dream,island_Torgersen
0,-0.894695,0.779559,-1.424608,-0.567621,0,1,0,0,1
1,-0.821552,0.119404,-1.067867,-0.505525,1,0,0,0,1
2,-0.675264,0.424091,-0.425733,-1.188572,1,0,0,0,1
3,-1.333559,1.084246,-0.568429,-0.940192,1,0,0,0,1
4,-0.858123,1.744400,-0.782474,-0.691811,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...
328,2.159064,1.338151,0.430446,-0.257145,0,1,0,1,0
329,-0.090112,0.474872,0.073705,-1.002287,1,0,0,1,0
330,1.025333,0.525653,-0.568429,-0.536573,0,1,0,1,0
331,1.244765,0.931902,0.644491,-0.132954,0,1,0,1,0


In [279]:
# Definir X e Y, entradas y salidas

X = formatted_input
Y = output_df

# Reubicar output o no...
# pre_output = output_df.copy()

# adelie = pre_output.filter(['species_Adelie'], axis = 1)
# gentoo = pre_output.filter(['species_Gentoo'], axis = 1)
# chinstrap = pre_output.filter(['species_Chinstrap'], axis = 1)
# new_output = pd.concat([adelie, gentoo, chinstrap], axis = 1)

# Y = new_output

In [280]:
# X.head()

In [281]:
# Y.head()

### De dataframe a matrices Numpy

In [282]:
inputs = X.to_numpy(dtype='float', copy=True)
expected_output = Y.to_numpy(dtype='float', copy=True)

### Inicialización

In [283]:
# Startup dataset
items = inputs
targets = expected_output

# Multilayer Perceptron with one hidden layer || 9 input neurons, 9 hidden and 3 outputs
mlp = MLP(9, [9], 3)

# 500 iteraciones
mlp.train(items, targets, 500, 0.3)

Error: 0.16986247172618946 en epoch 1
Error: 0.06619389174952668 en epoch 2
Error: 0.04870922913436131 en epoch 3
Error: 0.032767811830532505 en epoch 4
Error: 0.021206815818572988 en epoch 5
Error: 0.014282622833399239 en epoch 6
Error: 0.010257930273544488 en epoch 7
Error: 0.007853984707745036 en epoch 8
Error: 0.006346374674325238 en epoch 9
Error: 0.005349535211428376 en epoch 10
Error: 0.004655388006759342 en epoch 11
Error: 0.004148599374059572 en epoch 12
Error: 0.0037633283426797793 en epoch 13
Error: 0.0034603893068353734 en epoch 14
Error: 0.003215404574654968 en epoch 15
Error: 0.0030125755768189346 en epoch 16
Error: 0.0028412875096726882 en epoch 17
Error: 0.002694180433228793 en epoch 18
Error: 0.002566009120745136 en epoch 19
Error: 0.0024529443319529294 en epoch 20
Error: 0.0023521305269677485 en epoch 21
Error: 0.002261397715978846 en epoch 22
Error: 0.0021790688419121946 en epoch 23
Error: 0.0021038280309052786 en epoch 24
Error: 0.002034628598162061 en epoch 25
Erro

Error: 0.00020642043731092783 en epoch 201
Error: 0.00020503369801136792 en epoch 202
Error: 0.00020366322743941586 en epoch 203
Error: 0.00020230876312745942 en epoch 204
Error: 0.00020097004785939323 en epoch 205
Error: 0.0001996468295474975 en epoch 206
Error: 0.00019833886111258472 en epoch 207
Error: 0.00019704590036732995 en epoch 208
Error: 0.00019576770990267024 en epoch 209
Error: 0.00019450405697720404 en epoch 210
Error: 0.00019325471340949136 en epoch 211
Error: 0.0001920194554731794 en epoch 212
Error: 0.00019079806379485395 en epoch 213
Error: 0.0001895903232545463 en epoch 214
Error: 0.00018839602288884142 en epoch 215
Error: 0.0001872149557964658 en epoch 216
Error: 0.00018604691904632297 en epoch 217
Error: 0.00018489171358789146 en epoch 218
Error: 0.00018374914416391985 en epoch 219
Error: 0.00018261901922534586 en epoch 220
Error: 0.00018150115084839214 en epoch 221
Error: 0.00018039535465376195 en epoch 222
Error: 0.00017930144972788133 en epoch 223
Error: 0.000178

Error: 8.264174391687425e-05 en epoch 400
Error: 8.237522410040537e-05 en epoch 401
Error: 8.211028156905666e-05 en epoch 402
Error: 8.184690290170063e-05 en epoch 403
Error: 8.158507482507639e-05 en epoch 404
Error: 8.132478421179573e-05 en epoch 405
Error: 8.106601807839243e-05 en epoch 406
Error: 8.080876358339201e-05 en epoch 407
Error: 8.055300802542002e-05 en epoch 408
Error: 8.029873884133269e-05 en epoch 409
Error: 8.004594360438339e-05 en epoch 410
Error: 7.979461002241466e-05 en epoch 411
Error: 7.95447259360745e-05 en epoch 412
Error: 7.929627931707191e-05 en epoch 413
Error: 7.904925826644379e-05 en epoch 414
Error: 7.880365101286201e-05 en epoch 415
Error: 7.855944591096004e-05 en epoch 416
Error: 7.83166314396853e-05 en epoch 417
Error: 7.807519620068023e-05 en epoch 418
Error: 7.783512891668577e-05 en epoch 419
Error: 7.759641842996851e-05 en epoch 420
Error: 7.735905370077258e-05 en epoch 421
Error: 7.712302380579841e-05 en epoch 422
Error: 7.68883179366988e-05 en epoch

## ------------------------------Store Data From Trained NN--------------------------------------

In [284]:
trained_activations = mlp.activations
trained_weights = mlp.weights
    
def forward_propagate(inputs):
    activations = inputs
    trained_activations[0] = activations

    # iterate through the network layers
    for i, w in enumerate(trained_weights):
        net_inputs = np.dot(activations, w)

        activations = mlp._sigmoid(net_inputs)
        # i+1: because activation = input of next neuron
        trained_activations[i + 1] = activations

    return activations

In [None]:
### Load testing data (copied whole training data as testing data)

In [285]:
# test_input = inputs
# test_target = expected_output

# # get a prediction
# predicted_output = mlp.forward_propagate(test_input)

# print()
# predicted_output

In [286]:
# # Aplicando redondeo a resultados de predicción:
# rounded_predicted_output = np.around(predicted_output, 1).copy()
# rounded_predicted_output

## ----------------------------------------------SINGLE INPUT TESTING-----------------------------------------------------

### From (input list) to (dictionary) to (dataframe) and finally to (numpy array)...!

In [304]:
# Convert to dictionary with labels so it can be sent to dataframe

# input_list = ['Torgersen', '39.3', '20.6', '190', '3650', 'masculino']  # -> Es un Adelie -> [1, 0, 0]
input_list = ['Biscoe','42','13.5','210','4150','femenino']  # -> Es un Gento -> [0, 0, 1]

labels = ['island','bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g','sex']

dict_data=dict()
for i, _ in enumerate(zip(input_list, labels)):
    dict_data[labels[i]] = input_list[i]
    
print(dict_data)

{'island': 'Biscoe', 'bill_length_mm': '42', 'bill_depth_mm': '13.5', 'flipper_length_mm': '210', 'body_mass_g': '4150', 'sex': 'femenino'}


In [305]:
# Convert to dataframe from dictionary and rollback to original english labels

pd_data_oriented = pd.DataFrame.from_dict(dict_data, orient='index')
pd_data = pd_data_oriented.transpose()
pd_data['sex'].replace(['femenino', 'masculino'], ['female', 'male'], inplace=True)

pd_data

Unnamed: 0,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Biscoe,42,13.5,210,4150,female


### Encode one hot

In [306]:
df_input = pd_data.copy()
encode = ['sex', 'island']

for col in encode:
    dummy = pd.get_dummies(df_input[col], prefix=col)
    df_input = pd.concat([df_input, dummy], axis=1)    
    del df_input[col]
    
# Missing data variants after encoding, single input means some columns will be missing...

In [307]:
# Add missing columns and fill with 0s

def add_extra_cols(df):
    labels=['sex_female','sex_male','island_Biscoe','island_Dream','island_Torgersen']
    dict_data = dict()
    for label in labels:        
        if label not in df.columns:            
            dict_data[label] = 0
            mock_df = pd.DataFrame.from_dict(dict_data, orient='index').transpose()
            new_df = pd.concat([df, mock_df], axis=1)
            
    return new_df

In [308]:
# Dataframe encoded
df_final = add_extra_cols(df_input)
df_final

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex_female,island_Biscoe,sex_male,island_Dream,island_Torgersen
0,42,13.5,210,4150,1,1,0,0,0


In [309]:
# Rearrange columns y normalize
df_numbers = df_final.filter(['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g'])

df_genders = df_final.filter(['sex_female','sex_male'])
df_islands = df_final.filter(['island_Biscoe','island_Dream','island_Torgersen'])

# Normalize number data with mean and std from original dataset
means = [43.992793, 17.164865, 200.966967, 4207.057057]
stds = [5.468668, 1.969235, 14.015765, 805.215802]

# Manually normalize each value in row
for i, col in enumerate(list(df_numbers.columns)):
    normal_data = float(df_numbers.at[0,col])
    normal_data = round((normal_data-means[i])/stds[i], 6) # normalize and round
    df_numbers.at[0, col] = str(normal_data)

# Rejoin separated columns to the right side in proper order (same as training input format)
formatted_df = pd.concat([df_numbers, df_genders], axis=1)
formatted_df = pd.concat([formatted_df, df_islands], axis=1)
    
# formatted_df

In [314]:
formatted_df

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex_female,sex_male,island_Biscoe,island_Dream,island_Torgersen
0,-0.364402,-1.86106,0.644491,-0.070859,1,0,1,0,0


In [313]:
print(len(formatted_df.columns))

9


In [296]:
# Define test input in numpy array

X_input = formatted_df.to_numpy(dtype='float', copy=True)

### Forward pass through trained NN

In [297]:
# Ask for the NN prediction
predicted_output = forward_propagate(X_input)

predicted_output

array([[1.30615065e-04, 3.06026091e-03, 9.95787144e-01]])

In [298]:
# Aplicando redondeo a resultados de predicción:
rounded_predicted_output = np.around(predicted_output, 1).copy()

print("Output is:")
print(rounded_predicted_output)
print("Adelie = [1, 0, 0]",'||', "Chinstrap = [0, 1, 0]",'||', "Gento = [0, 0, 1]")

Output is:
[[0. 0. 1.]]
Adelie = [1, 0, 0] || Chinstrap = [0, 1, 0] || Gento = [0, 0, 1]


In [299]:
# Adelie = [1, 0, 0] || Chinstrap = [0, 1, 0] || Gento = [0, 0, 1]

lst = rounded_predicted_output.tolist()

for elem in lst:
    if elem == [1.0, 0.0, 0.0]:
        print("Penguin is Adelie")
    elif elem == [0.0, 1.0, 0.0]:
        print("Penguin is Chinstrap")
    elif elem == [0.0, 0.0, 1-0]:
        print("Penguin is Gentoo")
    else:
        print("No hay certeza")

Penguin is Gentoo
