In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
sns.set_style('darkgrid')
mpl.rcParams['figure.figsize'] = [18,10]

### The linear regression model 

- How do we get from a linear regression model to a neural network?
    - By adding a hidden layer
    - A Dense layer applies weights to all nodes from the previous layer

### A simple dense layer

In [3]:
import tensorflow as tf

# Define inputs (features)
inputs = tf.constant([[1.0, 35.0]])

# Define weights 
weights = tf.Variable([[-0.05], [-0.01]])

# Define the bias 
bias = tf.Variable([0.5])

# Multiply inputs (features) by the weights
product = tf.matmul(inputs, weights)

# Define dense layer
dense = tf.keras.activations.sigmoid(product+bias)

### Defining a complete model

In [4]:
# Define input (features) layer 
inputs = tf.constant(inputs, tf.float32)

In [5]:
# Define a first dense layer
dense1 = tf.keras.layers.Dense(10, activation='sigmoid')(inputs)

In [6]:
# Define a second dense layer
dense2 = tf.keras.layers.Dense(5, activation='sigmoid')(dense1)

In [7]:
# Define output (predictions) layer
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)

### High-level versus low-level approach
- High-level approach
    - High-level API operations
- Low-level approach 
    - Linear-algebraic operations

In [8]:
from tensorflow import Variable, ones, matmul, keras

In [9]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0',
       'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default.payment.next.month'],
      dtype='object')

In [10]:
borrower_features = df[['EDUCATION', 'MARRIAGE', 'AGE']].values
borrower_features = tf.convert_to_tensor(borrower_features, np.float32)
borrower_features

<tf.Tensor: id=102, shape=(30000, 3), dtype=float32, numpy=
array([[ 2.,  1., 24.],
       [ 2.,  2., 26.],
       [ 2.,  2., 34.],
       ...,
       [ 2.,  2., 37.],
       [ 3.,  1., 41.],
       [ 2.,  1., 46.]], dtype=float32)>

In [11]:
# Initialize bias1
bias1 = Variable(1.0)

# Initialize weights1 as 3x2 variable of ones
weights1 = Variable(ones((3, 2)))

# Perform matrix multiplication of borrower_features and weights1
product1 = matmul(borrower_features, weights1)

# Apply sigmoid activation function to product1 + bias1
dense1 = keras.activations.sigmoid(product1 + bias1)

# Print shape of dense1
print("\n dense1's output shape: {}".format(dense1.shape))


 dense1's output shape: (30000, 2)


In [12]:
# Initialize bias2 and weights2
bias2 = Variable(1.0)
weights2 = Variable(ones((2, 1)))

# Perform matrix multiplication of dense1 and weights2
product2 = matmul(dense1, weights2)

# Apply activation to product2 + bias2 and print the prediction
prediction = keras.activations.sigmoid(product2 + bias2)
print('\n prediction: {}'.format(prediction.numpy()[0,0]))
print('\n actual: 1')


 prediction: 0.9525741338729858

 actual: 1


In [13]:
idx = tf.constant([0,1,2,3,4])

borrower_features = tf.gather(borrower_features, idx)

borrower_features

<tf.Tensor: id=147, shape=(5, 3), dtype=float32, numpy=
array([[ 2.,  1., 24.],
       [ 2.,  2., 26.],
       [ 2.,  2., 34.],
       [ 2.,  1., 37.],
       [ 2.,  1., 57.]], dtype=float32)>

In [14]:
bias1 = Variable([0.1])
bias1

<tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.1], dtype=float32)>

In [15]:
weights1 = Variable([[-0.6 ,  0.6 ],
       [ 0.8 , -0.3 ],
       [-0.09, -0.08]])

weights1

<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.6 ,  0.6 ],
       [ 0.8 , -0.3 ],
       [-0.09, -0.08]], dtype=float32)>

In [16]:
# Compute the product of borrower_features and weights1
products1 = matmul(borrower_features, weights1)

# Apply a sigmoid activation function to products1 + bias1
dense1 = keras.activations.sigmoid(products1+bias1)

# Print the shapes of borrower_features, weights1, bias1, and dense1
print('\n shape of borrower_features: ', borrower_features.shape)
print('\n shape of weights1: ', weights1.shape)
print('\n shape of bias1: ', bias1.shape)
print('\n shape of dense1: ', dense1.shape)


 shape of borrower_features:  (5, 3)

 shape of weights1:  (3, 2)

 shape of bias1:  (1,)

 shape of dense1:  (5, 2)


In [17]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0',
       'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default.payment.next.month'],
      dtype='object')

In [18]:
features = df.columns[1:11].tolist()
features

['LIMIT_BAL',
 'SEX',
 'EDUCATION',
 'MARRIAGE',
 'AGE',
 'PAY_0',
 'PAY_2',
 'PAY_3',
 'PAY_4',
 'PAY_5']

In [19]:
borrower_features = df[features].values
borrower_features = tf.convert_to_tensor(borrower_features, np.float32)
idx = tf.constant(list(range(0,100)))

borrower_features = tf.gather(borrower_features, idx)

#borrower_features

In [20]:
from tensorflow import keras

In [21]:
# Define the first dense layer
dense1 = keras.layers.Dense(7, activation='sigmoid')(borrower_features)

# Define a dense layer with 3 output nodes
dense2 = keras.layers.Dense(3, activation='sigmoid')(dense1)

# Define a dense layer with 1 output node
predictions = keras.layers.Dense(1, activation='sigmoid')(dense2)

# Print the shapes of dense1, dense2, and predictions
print('\n shape of dense1: ', dense1.shape)
print('\n shape of dense2: ', dense2.shape)
print('\n shape of predictions: ', predictions.shape)


 shape of dense1:  (100, 7)

 shape of dense2:  (100, 3)

 shape of predictions:  (100, 1)


### What is an activation function?
- Components of a typical hidden layer
    - **Linear**: Matrix multiplication
    - **Nonlinear**: Activation function

- Why nonlinearities are important    

In [22]:
# Define example borrower features
young, old = 0.3, 0.6
low_bill, high_bill = 0.1, 0.5

# Apply matrix multiplication step for all feature combinations
young_high = 1.0*young + 2.0*high_bill
young_low = 1.0*young + 2.0*low_bill
old_high = 1.0*old + 2.0*high_bill
old_low = 1.0*old + 2.0*low_bill

In [23]:
# Difference in default predictions for young
print(young_high - young_low)

0.8


In [24]:
# Difference in default predictions for old
print(old_high - old_low)

0.8


In [25]:
# Difference in default predictions for young
print(tf.keras.activations.sigmoid(young_high).numpy() -
tf.keras.activations.sigmoid(young_low).numpy())

# Difference in default predictions for old
print(tf.keras.activations.sigmoid(old_high).numpy() -
tf.keras.activations.sigmoid(old_low).numpy())

0.16337568
0.14204389


### The sigmoid activation function 

- Sigmoid activation function
    - Binary classification
    - Low-level: `tf.keras.activations.sigmoid()`
    - High-level: `sigmoid`

<img src="https://github.com/MikSm1th/datacamp_notes/blob/master/tensor_flow/data/sigmoid.png?raw=true" height="500" width="500">

### The relu activation function
- ReLu activation function
    - Hidden layers
    - Low-level: `tf.keras.activations.relu()`
    - High-level: `relu`

<img src="https://github.com/MikSm1th/datacamp_notes/blob/master/tensor_flow/data/relu.png?raw=true" height="500" width="500">

### The softmax activation function
- Softmax activation function
    - Output layer (>2 classes)
    - Low-level: `tf.keras.activations.softmax()`
    - High-level: `softmax`
- Returned as predicted class probabilites in multiclass problems. 


In [26]:
# Define input layer
inputs = tf.constant(borrower_features, tf.float32)

In [27]:
# Define dense layer 1 
dense1 = tf.keras.layers.Dense(16, activation='relu')(inputs)

In [28]:
# Define dense layer 2 
dense2 = tf.keras.layers.Dense(8, activation='sigmoid')(dense1)

In [29]:
# Define output layer  
outputs = tf.keras.layers.Dense(4, activation='softmax')(dense2)

In [30]:
df = pd.read_csv('data/uci_credit_card.csv')
df.columns

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0',
       'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default.payment.next.month'],
      dtype='object')

In [31]:
bill_amounts = df[['BILL_AMT1','BILL_AMT2','BILL_AMT3']].values
bill_amounts

array([[ 3913.,  3102.,   689.],
       [ 2682.,  1725.,  2682.],
       [29239., 14027., 13559.],
       ...,
       [ 3565.,  3356.,  2758.],
       [-1645., 78379., 76304.],
       [47929., 48905., 49764.]])

In [32]:
default = df['default.payment.next.month'].values
default

array([1, 1, 0, ..., 1, 1, 1])

In [33]:
from tensorflow import constant, float32, keras 

In [34]:
# Construct input layer from features
inputs = constant(bill_amounts, float32)

# Define first dense layer
dense1 = keras.layers.Dense(3, activation='relu')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(2, activation='relu')(dense1)

# Define output layer
outputs = keras.layers.Dense(1, activation='sigmoid')(dense2)

In [35]:
# Print error for first five examples
error = default[:5] - outputs.numpy()[:5]
print(error)
# Don't know why sigmoid doesn't seem to be working here

[[ 1.   1.   0.   0.   0. ]
 [ 0.5  0.5 -0.5 -0.5 -0.5]
 [ 0.5  0.5 -0.5 -0.5 -0.5]
 [ 1.   1.   0.   0.   0. ]
 [ 0.5  0.5 -0.5 -0.5 -0.5]]


In [40]:
df.head()
borrower_features = df[['BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']].values
borrower_features

array([[ 3913.,  3102.,   689., ...,     0.,     0.,     0.],
       [ 2682.,  1725.,  2682., ...,  1000.,     0.,  2000.],
       [29239., 14027., 13559., ...,  1000.,  1000.,  5000.],
       ...,
       [ 3565.,  3356.,  2758., ...,  4200.,  2000.,  3100.],
       [-1645., 78379., 76304., ...,  1926., 52964.,  1804.],
       [47929., 48905., 49764., ...,  1000.,  1000.,  1000.]])

In [41]:
# Construct input layer from borrower features
inputs = constant(borrower_features, float32)

# Define first dense layer
dense1 = keras.layers.Dense(10, activation='sigmoid')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(8, activation='relu')(dense1)

# Define output layer
outputs = keras.layers.Dense(6, activation='softmax')(dense2)

# Print first five predictions
print(outputs.numpy()[:5])

[[0.17570493 0.16328397 0.13495122 0.17604467 0.15727484 0.19274037]
 [0.19629978 0.13583869 0.17792721 0.10455916 0.22216482 0.16321039]
 [0.19013937 0.19181807 0.16439721 0.18515061 0.14620133 0.12229344]
 [0.19013937 0.19181807 0.16439721 0.18515061 0.14620133 0.12229344]
 [0.22710752 0.13175103 0.12229656 0.07892621 0.25882366 0.18109499]]


### Optimizers

- Stochastic gradient descent or SGD is an improved version of gradient descent that is les


In [36]:
!../gitbsh > /dev/null 2>&1