In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
sns.set_style('darkgrid')
mpl.rcParams['figure.figsize'] = [18,10]

### The linear regression model 

- How do we get from a linear regression model to a neural network?
    - By adding a hidden layer
    - A Dense layer applies weights to all nodes from the previous layer

### A simple dense layer

In [None]:
import tensorflow as tf

# Define inputs (features)
inputs = tf.constant([[1.0, 35.0]])

# Define weights 
weights = tf.Variable([[-0.05], [-0.01]])

# Define the bias 
bias = tf.Variable([0.5])

# Multiply inputs (features) by the weights
product = tf.matmul(inputs, weights)

# Define dense layer
dense = tf.keras.activations.sigmoid(product+bias)

### Defining a complete model

In [None]:
# Define input (features) layer 
inputs = tf.constant(inputs, tf.float32)

In [None]:
# Define a first dense layer
dense1 = tf.keras.layers.Dense(10, activation='sigmoid')(inputs)

In [None]:
# Define a second dense layer
dense2 = tf.keras.layers.Dense(5, activation='sigmoid')(dense1)

In [None]:
# Define output (predictions) layer
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)

### High-level versus low-level approach
- High-level approach
    - High-level API operations
- Low-level approach 
    - Linear-algebraic operations

In [None]:
from tensorflow import Variable, ones, matmul, keras

In [None]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

In [None]:
borrower_features = df[['EDUCATION', 'MARRIAGE', 'AGE']].values
borrower_features = tf.convert_to_tensor(borrower_features, np.float32)
borrower_features

In [None]:
# Initialize bias1
bias1 = Variable(1.0)

# Initialize weights1 as 3x2 variable of ones
weights1 = Variable(ones((3, 2)))

# Perform matrix multiplication of borrower_features and weights1
product1 = matmul(borrower_features, weights1)

# Apply sigmoid activation function to product1 + bias1
dense1 = keras.activations.sigmoid(product1 + bias1)

# Print shape of dense1
print("\n dense1's output shape: {}".format(dense1.shape))

In [None]:
# Initialize bias2 and weights2
bias2 = Variable(1.0)
weights2 = Variable(ones((2, 1)))

# Perform matrix multiplication of dense1 and weights2
product2 = matmul(dense1, weights2)

# Apply activation to product2 + bias2 and print the prediction
prediction = keras.activations.sigmoid(product2 + bias2)
print('\n prediction: {}'.format(prediction.numpy()[0,0]))
print('\n actual: 1')

In [None]:
idx = tf.constant([0,1,2,3,4])

borrower_features = tf.gather(borrower_features, idx)

borrower_features

In [None]:
bias1 = Variable([0.1])
bias1

In [None]:
weights1 = Variable([[-0.6 ,  0.6 ],
       [ 0.8 , -0.3 ],
       [-0.09, -0.08]])

weights1

In [None]:
# Compute the product of borrower_features and weights1
products1 = matmul(borrower_features, weights1)

# Apply a sigmoid activation function to products1 + bias1
dense1 = keras.activations.sigmoid(products1+bias1)

# Print the shapes of borrower_features, weights1, bias1, and dense1
print('\n shape of borrower_features: ', borrower_features.shape)
print('\n shape of weights1: ', weights1.shape)
print('\n shape of bias1: ', bias1.shape)
print('\n shape of dense1: ', dense1.shape)

In [None]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

In [None]:
features = df.columns[1:11].tolist()
features

In [None]:
borrower_features = df[features].values
borrower_features = tf.convert_to_tensor(borrower_features, np.float32)
idx = tf.constant(list(range(0,100)))

borrower_features = tf.gather(borrower_features, idx)

#borrower_features

In [None]:
from tensorflow import keras

In [None]:
# Define the first dense layer
dense1 = keras.layers.Dense(7, activation='sigmoid')(borrower_features)

# Define a dense layer with 3 output nodes
dense2 = keras.layers.Dense(3, activation='sigmoid')(dense1)

# Define a dense layer with 1 output node
predictions = keras.layers.Dense(1, activation='sigmoid')(dense2)

# Print the shapes of dense1, dense2, and predictions
print('\n shape of dense1: ', dense1.shape)
print('\n shape of dense2: ', dense2.shape)
print('\n shape of predictions: ', predictions.shape)

### What is an activation function?
- Components of a typical hidden layer
    - **Linear**: Matrix multiplication
    - **Nonlinear**: Activation function

- Why nonlinearities are important    

In [None]:
# Define example borrower features
young, old = 0.3, 0.6
low_bill, high_bill = 0.1, 0.5

# Apply matrix multiplication step for all feature combinations
young_high = 1.0*young + 2.0*high_bill
young_low = 1.0*young + 2.0*low_bill
old_high = 1.0*old + 2.0*high_bill
old_low = 1.0*old + 2.0*low_bill

In [None]:
# Difference in default predictions for young
print(young_high - young_low)

In [None]:
# Difference in default predictions for old
print(old_high - old_low)

In [None]:
# Difference in default predictions for young
print(tf.keras.activations.sigmoid(young_high).numpy() -
tf.keras.activations.sigmoid(young_low).numpy())

# Difference in default predictions for old
print(tf.keras.activations.sigmoid(old_high).numpy() -
tf.keras.activations.sigmoid(old_low).numpy())

### The sigmoid activation function 

- Sigmoid activation function
    - Binary classification
    - Low-level: `tf.keras.activations.sigmoid()`
    - High-level: `sigmoid`

<img src="https://github.com/MikSm1th/datacamp_notes/blob/master/tensor_flow/data/sigmoid.png?raw=true" height="500" width="500">

### The relu activation function
- ReLu activation function
    - Hidden layers
    - Low-level: `tf.keras.activations.relu()`
    - High-level: `relu`

<img src="https://github.com/MikSm1th/datacamp_notes/blob/master/tensor_flow/data/relu.png?raw=true" height="500" width="500">

### The softmax activation function
- Softmax activation function
    - Output layer (>2 classes)
    - Low-level: `tf.keras.activations.softmax()`
    - High-level: `softmax`
- Returned as predicted class probabilites in multiclass problems. 


In [None]:
# Define input layer
inputs = tf.constant(borrower_features, tf.float32)

In [None]:
# Define dense layer 1 
dense1 = tf.keras.layers.Dense(16, activation='relu')(inputs)

In [None]:
# Define dense layer 2 
dense2 = tf.keras.layers.Dense(8, activation='sigmoid')(dense1)

In [None]:
# Define output layer  
outputs = tf.keras.layers.Dense(4, activation='softmax')(dense2)

In [None]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

In [None]:
bill_amounts = df[['BILL_AMT1','BILL_AMT2','BILL_AMT3']].values
bill_amounts

In [None]:
default = df['default.payment.next.month'].values
default

In [None]:
from tensorflow import constant, float32, keras 

In [None]:
# Construct input layer from features
inputs = constant(bill_amounts, float32)

# Define first dense layer
dense1 = keras.layers.Dense(3, activation='relu')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(2, activation='relu')(dense1)

# Define output layer
outputs = keras.layers.Dense(1, activation='sigmoid')(dense2)

In [None]:
# Print error for first five examples
error = default[:5] - outputs.numpy()[:5]
print(error)
# Don't know why sigmoid doesn't seem to be working here

In [None]:
df.head()
borrower_features = df[['BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']].values
borrower_features

In [None]:
# Construct input layer from borrower features
inputs = constant(borrower_features, float32)

# Define first dense layer
dense1 = keras.layers.Dense(10, activation='sigmoid')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(8, activation='relu')(dense1)

# Define output layer
outputs = keras.layers.Dense(6, activation='softmax')(dense2)

# Print first five predictions
print(outputs.numpy()[:5])

### Optimizers

- Stochastic gradient descent or SGD is an improved version of gradient descent that is less likely to get stuck in local minima.

<img src="https://github.com/MikSm1th/datacamp_notes/blob/master/tensor_flow/data/SGC.png?raw=true " height="500" width="500">

- For simple problems, the SGD algorithm performs well.


### The gradient descent optimizer
- Stochastic gradient descent (SGD) optimizer
    - `tf.keras.optimizers.SGD()`
    - `learning_rate`
- Simple and easy to interpret
***

### The RMS prop optimizer
- Root mean squared (RMS) propagation optimizer
    - Applies different learning rates to each feature
    - `tf.keras.optimizers.RMSprop()`
    - `learning_rate`
    - `momentum`
    - `decay`
- Allows for momentum to both build and decay

### The adam optimizer
- Adaptive moment (adam) optimizer
    - `tf.keras.optimizers.Adam()`
    - `learning_rate`
    - `beta1`
- Performs well with default parameter values

### A complete example

In [None]:
# Define the model function
def model(bias, weights, features = borrower_features):
    product = tf.matmul(features, weights)
    return tf.keras.activations.sigmoid(product+bias)

In [None]:
# Compute te predicted values and loss
def loss_function(bias, weights, targets = default, features = borrower_features):
    predictions = model(bias, weights)
    return tf.keras.losses.binary_crossentropy(targets, predictions)

In [None]:
borrower_features = constant(borrower_features, float32)

```python
# Minimize the loss function with RMS propagation
opt = tf.keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.9)
opt.minimize(lambda: loss_function(bias, weights), var_list=[bias, weights])
```

In [None]:
from tensorflow.python.util.tf_export import tf_export
from tensorflow.python.util import dispatch
@tf_export("math.divide", "divide")
@dispatch.add_dispatch_support
def divide(x, y, name=None):
  """Computes Python style division of `x` by `y`."""

  if name is not None:
    # Cannot use tensors operator overload, because it has no way to track
    # override names. Use a dummy class to track the runtime division behavior
    return DivideDelegateWithName(x, name) / y
  else:
    return x / y

In [None]:
import math
pi = math.pi
def loss_function(x):
    return 4.0*math.cos(x-1)+divide(math.cos(2.0*pi*x),x)

In [None]:
# Initialize x_1 and x_2
x_1 = Variable(6.0,float32)
x_2 = Variable(0.3,float32)

# Define the optimization operation
opt = keras.optimizers.SGD(learning_rate=0.01)

for j in range(100):
    # Perform minimization using the loss function and x_1
    opt.minimize(lambda: loss_function(x_1), var_list=[x_1])
    # Perform minimization using the loss function and x_2
    opt.minimize(lambda: loss_function(x_2), var_list=[x_2])

# Print x_1 and x_2 as numpy arrays
print(x_1.numpy(), x_2.numpy())

In [None]:
!../gitbsh > /dev/null 2>&1