In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from random import shuffle
from matplotlib import cm

%matplotlib inline

np.random.seed(22)
error_tab = []
lr_tab = []
acc_tab = []

dfu = pd.read_csv('flagdata.csv', header=None)

columns = ['Landmass', 'Zone', 'Area', 'Population', 'Language', 'Religion', 'Bars', 'Stripes', 'Colours', 'Red', 'Green', 'Blue', 'Gold', 'White', 'Black', 'Orange', 'MainHue', 'Circles', 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crescent', 'Triangle', 'Icon', 'Animate', 'Text', 'TopLeft', 'BotRight']
dfu.columns = columns

# dfu.info()

df=dfu
df=((df-df.min())/(df.max()-df.min()))
df["Religion"]=dfu["Religion"]


In [2]:
valid = df.sample(20)
train = df.drop(valid.index)

x = train.drop('Religion', axis=1).values
labels = train['Religion']
y = pd.get_dummies(train['Religion']).values

xt = valid.drop('Religion', axis=1).values
labels = valid['Religion']
yt = pd.get_dummies(valid['Religion']).values

In [3]:
y


array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0]], dtype=uint8)

In [4]:
# valid.head(20)


In [5]:
def layer_sizes(X, Y):
    n_x = 28 # input layer
    n_y = 8 # output layer
    return (n_x, n_y)

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [6]:
# def sigmoid(x):
#   # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
#   return 1 / (1 + np.exp(-x))

# def deriv_sigmoid(x):
#   # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
#   fx = sigmoid(x)
#   return fx * (1 - fx)

In [7]:
def initialize_parameters(n_x, n_h1, n_h2, n_y):
    W1 = np.random.randn(n_x,n_h1)
    W2 = np.random.randn(n_h1,n_h2)
    W3 = np.random.randn(n_h2,n_y)

    parameters = {"W1": W1,
                  "W2": W2,
                  "W3": W3}
    
    return parameters

In [8]:
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    
    # Layer 1
    Z1 = np.dot(X, W1)
    A1 = sigmoid(Z1)

    # Layer 2
    Z2 = np.dot(A1, W2)
    A2 = sigmoid(Z2)
    
    # Layer 3
    Z3 = np.dot(A2, W3)
    A3 = sigmoid(Z3)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2,
             "Z3": Z3,
             "A3": A3}
    
    return A3, cache

In [9]:
def compute_cost(A3, Y):
    cost = 0.5 * np.sum(np.power(A3-Y, 2))
    return cost

In [10]:
def accuracy(output_vec, test_vec):
    equals = np.equal(np.argmax(test_vec, axis=1), np.argmax(output_vec, axis=1))
    acc = np.mean(equals)
    return acc

In [11]:
def adapt_learning_rate(learning_rate, xi_d, xi_i, er_r, cost, prev_cost):
    if cost > er_r*prev_cost:
        learning_rate*=xi_d
    elif cost < prev_cost:
        learning_rate*=xi_i
    prev_cost = cost
    return learning_rate, prev_cost

In [12]:
def backward_propagation(parameters, cache, X, Y):
    """
    Implement the backward propagation using the instructions above.
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    # First, retrieve W1 and W2 from the dictionary "parameters".
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
        
    # Retrieve also A1 and A2 from dictionary "cache".
    A1 = cache['A1']
    A2 = cache['A2']
    A3 = cache['A3']
    
    Z1 = cache['Z1']
    Z2 = cache['Z2']
    
    ########## Layer 3
    delta3 = A3-Y
    dW3 = np.dot(A2.T, delta3)
    
    ########## Layer 2
    delta2 = np.dot(delta3, W3.T) * sigmoid_prime(Z2)
    dW2 = np.dot(A1.T, delta2)
    
    ########## Layer 1
    delta1 = np.dot(delta2, W2.T) * sigmoid_prime(Z1)
    dW1 = np.dot(X.T, delta1)

    
    grads = {"dW1": dW1,
             "dW2": dW2,      
             "dW3": dW3}
    return grads

In [13]:
def update_parameters(parameters, grads, learning_rate):
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']

    dW1 = grads['dW1']
    dW2 = grads['dW2']
    dW3 = grads['dW3']

    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    W3 -= learning_rate * dW3
    
    parameters = {"W1": W1,
                  "W2": W2,
                  "W3": W3}
    
    return parameters

In [26]:
def nn_model(X, Y, n_h1, n_h2, er_r, xi_i, xi_d, num_iterations, learning_rate):
    prev_cost = 1
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[1]
    
    parameters = initialize_parameters(n_x, n_h1, n_h2, n_y)
 
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    
#     max_acc = 0;
    
    for epoch in range(0, num_iterations):
         
        # Forward propagation. Inputs: "X, parameters". Outputs: "A3, cache".
        A3, cache = forward_propagation(X, parameters)

        # Cost function. Inputs: "A3, Y, parameters". Outputs: "cost".
        cost = compute_cost(A3, Y)
 
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
        
        # ADAPTIVE LEARNING RATE:
        learning_rate, prev_cost = adapt_learning_rate(learning_rate, xi_d, xi_i, er_r, cost, prev_cost)

        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads, learning_rate)

#         loss = -np.sum(y * np.log(A3))
#         error_tab.append(cost)
#         lr_tab.append(learning_rate)
#         equals = np.equal(np.argmax(y, axis=1), np.argmax(A3, axis=1))
#         acc = np.mean(equals)
#         A3, cache = forward_propagation(xt, parameters)
#         equals = np.equal(np.argmax(yt, axis=1), np.argmax(A3, axis=1))
#         acc_t = np.mean(equals)
#         acc_tab.append(acc)
#         print("Cost after iteration %i: %f, accuracy: %f" %(epoch, acc, acc_t))

    return parameters, A3

In [15]:
# nn_model(x, y, 15, 20, 1.01, 1.01, 0.7, 3000, 1.01)

In [27]:
# LAYERS SIZE
n_number = list(range(40, 50, 1))
acc_tab = np.zeros(shape=(100,100))
for n_h1 in n_number:
    for n_h2 in n_number:
        parameters, A3 = nn_model(x, y, n_h1, n_h2, er_r = 1.04, xi_i = 1.05, xi_d = 0.7, num_iterations = 3000, learning_rate = 0.1)
        acc = accuracy(output_vec = A3, test_vec = y)
        A3, cache = forward_propagation(xt, parameters)
        acc_t = accuracy(output_vec = A3, test_vec = yt)
        print("{}, {}, {}, {}".format(n_h1, n_h2, acc, acc_t))
        acc_tab[n_h1-1, n_h2-1] = acc*acc_t

40, 40, 0.9137931034482759, 0.45
40, 41, 0.8505747126436781, 0.3
40, 42, 0.6724137931034483, 0.55
40, 43, 0.8793103448275862, 0.3
40, 44, 0.7298850574712644, 0.3
40, 45, 0.8103448275862069, 0.25


KeyboardInterrupt: 

In [None]:
from matplotlib.pyplot import figure
fig = plt.figure(num=None, figsize=(6, 5), dpi=150, facecolor='w', edgecolor='k')
X = np.linspace(1, 100, 100)
Y = np.linspace(1, 100, 100)
X, Y = np.meshgrid(X, Y)
ax = fig.gca(projection='3d')
ax.set_xlabel('2nd layer')
ax.set_ylabel('1st layer')
ax.set_zlabel('acc * acc_t')
ax.plot_surface(X, Y, acc_tab, rstride=1, cstride=1, cmap='viridis', antialiased=True)

In [None]:
# XI_I XI_D
xi_i_number = list(range(101, 131, 1))
xi_d_number = list(range(71, 99, 1))
acc_tab_xi = np.zeros(shape=(10,10))
for n_h1 in n_number:
    for n_h2 in n_number:
        parameters, A3 = nn_model(x, y, n_h1, n_h2, er_r = 1.016, xi_i = 1.06, xi_d = 0.77, num_iterations = 4800, learning_rate = 0.01)
        acc = accuracy(output_vec = A3, test_vec = y)
        A3, cache = forward_propagation(xt, parameters)
        acc_t = accuracy(output_vec = A3, test_vec = yt)
        print("{}, {}, {}, {}".format(n_h1, n_h2, acc, acc_t))
        acc_tab[n_h1-1, n_h2-1] = acc*acc_t