In [22]:
import numpy as np
import pandas

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

# sigmoid for binary problem
def nonlin(x):
    return 1/(1+np.exp(-x))

# derivative of sigmoid
def drv_nonlin(x):
    return x*(1-x)

dataframe = pandas.read_csv("data/iris.csv", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
y = dummy_y
print(y.shape)

# print(X.shape)
# print(encoded_Y)
# print(encoded_Y.shape)
# print(dummy_y.shape)
# y = np.array(encoded_Y).reshape(150,1)
# print(y.shape)
# print(y)

# print(X)

# seed random numbers to make calculation
# this is neccessary to observe the result under same random numbers in one session
np.random.seed(1)

# hidden layer configuration.
# 3 layear: You need to configure synapse 0 and synapse 1 here.
synapse_0 = 2* np.random.random((X.shape[1], X.shape[0])) - 1 # 3x150
synapse_1 = 2* np.random.random((y.shape[0], y.shape[1])) - 1 # 150x1

# start training iterations (10k times)
for i in range(100000):
    # iteration start
    num_iter = i
    
    # ---- training iteration step 1: Forward Propagation
    # Forward Propagation applies weights of synapses in each layer to produce output layer
    # Output layer is the layer that is the last layer as a final output for an iteration of training
    # the initial layer is the input
    l0 = X # 4x2
    # the hidden layer after application of the first synapse weights to layer 0 values, which is the input
    l0_weighted = np.dot(l0,synapse_0) # 4x2 2x4 -> 4x4
    # smooth each weighted value into new value ranging [0, 1)
    # now we generated the hidden layer, l1
    l1 = nonlin(l0_weighted) # -> 4x4
    
    # we finally compute the output layer by applying the second synapse weights to layer 1 values
    l1_weighted = np.dot(l1,synapse_1) # 4x4 4x1 -> 4x1
    # smooth each weighted value into new value ranging [0, 1)
    # now we generated the hidden layer, l1
    l2 = nonlin(l1_weighted) # -> 4x1
    # ---- Forward Propagation completed
    
    
    # ---- training iteration step 2: Back Propagation
    # Back Propagation adjust each weight of synapse layer by layer
    
    # First we figure out the initial error amount and direction on the output layer relative to the given output y
    l2_error = y - l2 # 4x1 - 4x1 -> 4x1
    # Incorporate the derivative of sigmoid on layer 2 (output layer)
    l2_delta = l2_error * drv_nonlin(l2) # 4x1 4x1 -> 4x1
    # Apply the delta computed to each synapses for the next iteration of training
    synapse_1 += l1.T.dot(l2_delta) # 4x4.T(=4x4) 4x1 -> 4x1
    
    # How much weights of synapse 1 contributed to the l2_delta
    l1_error = l2_delta.dot(synapse_1.T) # 4x1 4x1.T(=1x4) -> 4x4 
    # Incorporate the derivative of sigmoid on layer 1 (hidden layer)
    l1_delta = l1_error * drv_nonlin(l1) # 4x4 4x4 -> 4x4    
    # Apply the delta computed to each synapses for the next iteration of training
    synapse_0 += l0.T.dot(l1_delta) # 4x2.T(=2x4) 4x4 -> 2x4 
    # ---- Back Propagation completed
    
    # Checking error of the output layer respects to given output for each 10000 iterations
    # The number should decrease over the iterations
    if (num_iter% 10000) == 0:
        print("Error:" + str(np.mean(np.abs(l2_error))))
        

print('')
print("Output After Training:")
print(l2)


print("This is success!")

"""

# input dataset
# X = np.array([ [0,1],
#                [0,1],
#                [1,0],
#                [1,0] ])

# output dataset
# y = np.array([ [0],
#                [0],
#                [1],
#                [1] ])
# or simplify
# y = np.array([[0,0,1,1]]).T

# seed random numbers to make calculation
# this is neccessary to observe the result under same random numbers in one session
np.random.seed(1)

# hidden layer configuration.
# 3 layear: You need to configure synapse 0 and synapse 1 here.
synapse_0 = 2* np.random.random((2, 4)) - 1 # 2x4
synapse_1 = 2* np.random.random((4, 1)) - 1 # 4x1

# start training iterations (10k times)
for i in range(100000):
    # iteration start
    num_iter = i
    
    # ---- training iteration step 1: Forward Propagation
    # Forward Propagation applies weights of synapses in each layer to produce output layer
    # Output layer is the layer that is the last layer as a final output for an iteration of training
    # the initial layer is the input
    l0 = X # 4x2
    # the hidden layer after application of the first synapse weights to layer 0 values, which is the input
    l0_weighted = np.dot(l0,synapse_0) # 4x2 2x4 -> 4x4
    # smooth each weighted value into new value ranging [0, 1)
    # now we generated the hidden layer, l1
    l1 = nonlin(l0_weighted) # -> 4x4
    
    # we finally compute the output layer by applying the second synapse weights to layer 1 values
    l1_weighted = np.dot(l1,synapse_1) # 4x4 4x1 -> 4x1
    # smooth each weighted value into new value ranging [0, 1)
    # now we generated the hidden layer, l1
    l2 = nonlin(l1_weighted) # -> 4x1
    # ---- Forward Propagation completed
    
    
    # ---- training iteration step 2: Back Propagation
    # Back Propagation adjust each weight of synapse layer by layer
    
    # First we figure out the initial error amount and direction on the output layer relative to the given output y
    l2_error = y - l2 # 4x1 - 4x1 -> 4x1
    # Incorporate the derivative of sigmoid on layer 2 (output layer)
    l2_delta = l2_error * drv_nonlin(l2) # 4x1 4x1 -> 4x1
    # Apply the delta computed to each synapses for the next iteration of training
    synapse_1 += l1.T.dot(l2_delta) # 4x4.T(=4x4) 4x1 -> 4x1
    
    # How much weights of synapse 1 contributed to the l2_delta
    l1_error = l2_delta.dot(synapse_1.T) # 4x1 4x1.T(=1x4) -> 4x4 
    # Incorporate the derivative of sigmoid on layer 1 (hidden layer)
    l1_delta = l1_error * drv_nonlin(l1) # 4x4 4x4 -> 4x4    
    # Apply the delta computed to each synapses for the next iteration of training
    synapse_0 += l0.T.dot(l1_delta) # 4x2.T(=2x4) 4x4 -> 2x4 
    # ---- Back Propagation completed
    
    # Checking error of the output layer respects to given output for each 10000 iterations
    # The number should decrease over the iterations
    if (num_iter% 10000) == 0:
        print("Error:" + str(np.mean(np.abs(l2_error))))
        

print('')
print("Output After Training:")
print(l2)
"""

(150, 3)


ValueError: shapes (150,150) and (3,150) not aligned: 150 (dim 1) != 3 (dim 0)