In [None]:
### Framework for implementing deep learning algorithms
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Normalization # Import three layers from keras
from tensorflow.keras import Sequential
from tensorflow.keras.losses import MeanSquaredError, BinaryCrossentropy
from tensorflow.keras.activations import sigmoid

import numpy as np

# Data in TensorFlow
#### By represeting data as matrix it helps tf be more computationally efficient internally

In [None]:
General_Matrix = np.array([[col1, col2], # Row1
                           [col1, col2], # Row2
                           [col1, col2], # Row3
                           [col1, col2]  # Row4
                          ])

# 4x2 Matrix
X = np.array([[200.0, 17.0], 
              [120.0, 5.0],
              [425.0, 20.0],
              [212.0, 18.0]]) 


# 1d array = list of numbers and not a matrix 
y = np.array([1, 0, 0, 1])


# Normalize Data

In [None]:
### Fitting the weights to the data (back-propagation) will proceed more quickly if the data features are normalized (Have similar range)

# Create a normalization layer (Not a layer in your model)
norm_layer = tf.keras.layers.Normalization(axis=-1)

# Adapt the data: Learn the mean and variance of the data set and save the values internally
norm_layer.adapt(X)  

# Normalize the data (Apply this to any future data that utilizes the learned model)
Xn = norm_layer(X)

# Inference (Forward Propagation) in NN 

### Explicitly one layer of computation at a time 

In [None]:
layer_1 = tf.keras.layers.Dense(units = 25, activation='sigmod')
a1 = layer_1(X) #a1 is a 25x1 matrix (a1 is a tensor)

layer_2 = tf.keras.layers.Dense(units = 15, activation='sigmod')
a2 = layer_1(a1)

# Output Layer with 1 neuron
layer_3 = tf.keras.layers.Dense(units = 1, activation='sigmod')
a3 = layer_1(a2) #Output is 2d array that is a 1x1 matrix

### Using Sequential to string the layers and make a NN

In [None]:
layer_1 = tf.keras.layers.Dense(units = 25, activation='sigmod')
layer_2 = tf.keras.layers.Dense(units = 15, activation='sigmod')
layer_3 = tf.keras.layers.Dense(units = 1, activation='sigmod')

model = Sequential([layer_1, layer_2, layer_1])  

### Formal way
#### Tensorflow models are built layer by layer, a layer's input dimensions are calculated for you. You specify a layer's output dimensions and this determines the next layer's input dimension.

In [None]:
# Like random_state
tf.random.set_seed(1234) 

# The Sequential model is a convenient means of constructing multi-layer models
model = Sequential(
    [        
    # Input Layer and shape specifies the expected shape of the input
    # Allows Tensorflow to size the weights and bias parameteress at this points given the shape of the input
    # This statement can be omitted in practice and Tensorflow will size the network parameters when the input data is specified in the "model.fit" statement 
        
    tf.keras.Input(shape=(1,m)), #Input is always a one row MATRIX with multiple columns (1,m)
    Dense(units = 25, activation='sigmod', name='first_layer'),
    Dense(units = 15, activation='sigmod', name='second_layer'),
    Dense(units = 1, name='output_layer') # Dont include sigmmoid activation in the final layer its accounted for in the loss
    ], name = "my_model"
)

# Defines a loss function and specifies a compile optimization
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
)

# Runs gradient descent to fit the weights to the data
# For efficiency, the training data set is broken into 'batches' with a default size of 32 
# epochs specifies how many times the NN will pass through the entire training dataset during training
# Each epoch represents one full cycle through all the training data.
model.fit(X, y, 
          batch_size=32, 
          epochs=10,)


X_new = np.array([[350.0, 122.0]])

# Carries out forward propagation using the NN we compiled using Sequential
# Returns a matrix 1x1 carrying the probability
predictions = model.predcit(X_new) 


# Apply a threshold to convert the probabilities to a decision
yhat = np.zeros_like(predictions)
for i in range(len(predictions)):
    if predictions[i] >= 0.5:
        yhat[i] = 1
    else:
        yhat[i] = 0

### Utilities on layers 
#### Number of parameters per layer = (number_of_units x number_of_input_features) + number_of_units  

#### -The weights 𝑊 should be a matrix with a size (number_of_input_features, number_of_units) 
#### -The bias 𝑏 size should be a 1d array with a size equal to the (number_of_units)

#### -Concatenate every parameter vector (column) of each neuron in the layer to create the 𝑊 matrix that has all the parameters
#### -Same approach with the bias vector that can be represented as a 1-D (n,) or 2-D (1,n) array but Tensorflow utilizes a 1-D representation)

In [None]:
# Shows the layers and number of parameters in the model
# The parameter counts shown in the summary correspond to the number of elements in the weight and bias arrays
model.summary()

# Extract the layers
[layer1, layer2, layer3] = model.layers

# Extract the weights of each layer
W1,b1 = layer1.get_weights()
W2,b2 = layer2.get_weights()
W3,b3 = layer3.get_weights()

first_layer = model.get_layer('first_layer')
w,b = first_layer.get_weights()

# Set the weights
set_b = np.array([100, ....])
set_w = np.array([[200, ....]])
first_layer.set_weights([set_w, set_b])

### Utilities on tensors

In [None]:
# Convert the tensor to a numpy array
a1.numpy()

#Access weights directly in their tensor form
model.layers[2].weights 

# Neuron without activation - Regression/Linear Model

In [None]:
X_train = np.array([[1.0], [2.0]], dtype=np.float32)           #(size in 1000 square feet)
Y_train = np.array([[300.0], 
                    [500.0]], dtype=np.float32)       #(price in 1000s of dollars)

# A layer with one neuron or unit 
linear_layer = tf.keras.layers.Dense(units=1, activation = 'linear', )

# Prediction
prediction_tf = linear_layer(X_train)
prediction_np = np.dot( X_train, set_w) + set_b

prediction_tf == prediction_np #True

# Neuron with Sigmoid activation

In [None]:
X_train = np.array([0., 1, 2, 3, 4, 5], dtype=np.float32).reshape(-1,1)  # 2-D Matrix
Y_train = np.array([0,  0, 0, 1, 1, 1], dtype=np.float32).reshape(-1,1)  # 2-D Matrix

model =  tf.keras.layers.Dense(1, input_dim=1,  activation = 'sigmoid', name='L1')

# Prediction
prediction = model.predict(X_train[0].reshape(1,1))
alog = sigmoidnp(np.dot(set_w,X_train[0].reshape(1,1)) + set_b)

a1 == alog #True

# Build Dense and Sequential from Scratch

In [2]:
# Building dense with and without vectorisation
def my_dense(a_in, W, b, g):
    units = W.shape[1]
    a_out = np.zeros(units)
    for j in range(units):               
        w = W[:,j]                                    
        z = np.dot(w, a_in) + b[j]         
        a_out[j] = g(z)               
    return(a_out)

def my_dense_vectorized(A_in, W, B, g): #Everything is a matrix 
    Z = np.matmult(A_in, W) + B
    A_out = g(Z)
    return A_out

# Building the predict function
def my_predict(X, W1, b1, W2, b2):
    m = X.shape[0]
    p = np.zeros((m,1))
    for i in range(m):
        p[i,0] = my_sequential(X[i], W1, b1, W2, b2)
    return(p)

# Preparing some parameters instead of building the gradient descent function
W1_tmp = np.array( [[-8.93,  0.29, 12.9 ], 
                    [-0.1,  -7.32, 10.81]] )
b1_tmp = np.array( [-9.82, -9.28,  0.96] )

W2_tmp = np.array( [[-31.18], 
                    [-27.59], 
                    [-32.56]] )
b2_tmp = np.array( [15.41] )


# Preparing some test data
X_tst = np.array([
    [200,13.9],  # postive example
    [200,17]])   # negative example

# Normalizing the test data
X_tstn = norm_l(X_tst) 

# Prediction (Between 0 and 1)
predictions = my_predict(X_tstn, W1_tmp, b1_tmp, W2_tmp, b2_tmp)

# Threshold application
yhat = np.zeros_like(predictions)
for i in range(len(predictions)):
    if predictions[i] >= 0.5:
        yhat[i] = 1
    else:
        yhat[i] = 0