## Tensorflow 

from tensorflow import tf

- open source library for graph-based numerical computation
- developed by Google
- low and high level APIs

### Tensor
- generalization of vectors and matrices

### Constant
- simplest category of a tensor
- not trainable
- can have any dimension

        # Import constant from TensorFlow
        from tensorflow import constant

        # Convert the credit_numpy array into a tensorflow constant
        credit_constant = constant(credit_numpy)

        # Print constant datatype
        print('The datatype is:', credit_constant.dtype)

        # Print constant shape
        print('The shape is:', credit_constant.shape)

### Variable

    from tensorflow import Variable
    
    # Define the 1-dimensional variable A1
    A1 = Variable([1, 2, 3, 4])

    # Print the variable A1
    print(A1)

    # Convert A1 to a numpy array and assign it to B1
    B1 = A1.numpy()

    # Print B1
    print(B1)


## Operations

- graph based operations
    - tensors = edges
    - operations = nodes

### Addition

- tensor addition
    - the add() operation performs element-wise addition w/ 2 tensors
    - tensors to be added must be the same shape
    - can also use the + symbol

### Multiplication

- tensors must have the same shape

#### multiply() for tensors
    
        # Define tensors A1 and A23 as constants
    A1 = constant([1, 2, 3, 4])
    A23 = constant([[1, 2, 3], [1, 6, 4]])

    # Define B1 and B23 to have the correct shape
    B1 = ones_like(A1)
    B23 = ones_like(A23)

    # Perform element-wise multiplication
    C1 = multiply(B1,A1)
    C23 = multiply(B23,A23)

    # Print the tensors C1 and C23
    print('C1: {}'.format(C1.numpy()))
    print('C23: {}'.format(C23.numpy()))
    
#### matmul() for matrix multiplication

    # Define features, params, and bill as constants
    features = constant([[2, 24], [2, 26], [2, 57], [1, 37]])
    params = constant([[1000], [150]])
    bill = constant([[3913], [2682], [8617], [64400]])

    # Compute billpred using features and params
    billpred = matmul(features, params)

    # Compute and print the error
    error = bill - billpred
    print(error.numpy())

    
### Summation over Tensor

- sums over all the dimensions to reduce tensor size

#### reduce_sum()

    reduce_sum(data, resulting_dim).numpy()
    
#### gradient()
- computes the slope of a function at a point
- assists in finding optimums
    - minimum - lowest value of a  loss function (change in gradient > 0)
    - maximum - highest value of objective function (change in gradient < 0)
    - optimum - point where gradient = 0
   
            def compute_gradient(x0):
            # Define x as a variable with an initial value of x0
            x = Variable(x0)
            with GradientTape() as tape:
                tape.watch(x)
                # Define y using the multiply operation
                y = multiply(x,x)
            # Return the gradient of y with respect to x
            return tape.gradient(y, x).numpy()

            # Compute and print gradients at x = -1, 1, and 0
            print(compute_gradient(-1.0))
            print(compute_gradient(1.0))
            print(compute_gradient(0.0))

#### reshape
- reshapes a tensor
- good for reshaping images

        # Reshape the grayscale image tensor into a vector
        gray_vector = reshape(gray_tensor, (784, 1))

        # Reshape the color image tensor into a vector
        color_vector = reshape(color_tensor, (2352, 1))

#### random()
- populates tensor w/ entries drawn from a prob. distribution)

## Linear Models

### Input Data

- data can be imported using tensorflow for pipeline management
- simpler option is to import with pandas and convert to numpy array

#### tf.cast()
- sets datatypes as a tensor

        # Define waterfront as a Boolean using cast
        waterfront = tf.cast(housing['waterfront'], tf.bool)
        
### Loss Functions

- used to train models
- measure of model fit
- goal is to minimize loss
- MSE, MAE and Huber loss functions all available through tf.keras.losses.####()

        # Import the keras module from tensorflow
        from tensorflow import keras

        # Compute the mean squared error (mse)
        loss = keras.losses.mse(price, predictions)
        
        # Compute the mean absolute error (mae)
        loss = keras.losses.mse(price, predictions)

### Linear Regression

#### univariate

    # Define a linear regression model
    def linear_regression(intercept, slope, features = size_log):
        return intercept + (slope*features)

    # Set loss_function() to take the variables as arguments
    def loss_function(intercept, slope, features = size_log, targets = price_log):
        # Set the predicted values
        predictions = linear_regression(intercept, slope, features)

        # Return the mean squared error loss
        return keras.losses.mse(targets, predictions)

    # Compute the loss for different slope and intercept values
    print(loss_function(0.1, 0.1).numpy())
    print(loss_function(0.1, 0.5).numpy())
    
    # Initialize an adam optimizer
    opt = keras.optimizers.Adam(0.5)

    for j in range(100):
        # Apply minimize, pass the loss function, and supply the variables
        opt.minimize(lambda: loss_function(intercept,slope), var_list=[intercept, slope])

        # Print every 10th value of the loss
        if j % 10 == 0:
            print(loss_function(intercept, slope).numpy())

    # Plot data and regression line
    plot_results(intercept, slope)
    
#### multivariate

    # Define the linear regression model
    def linear_regression(params, feature1 = size_log, feature2 = bedrooms):
        return params[0] + feature1*params[1] + feature2*params[2]

    # Define the loss function
    def loss_function(params, targets = price_log, feature1 = size_log, feature2 = bedrooms):
        # Set the predicted values
        predictions = linear_regression(params, feature1, feature2)

        # Use the mean absolute error loss
        return keras.losses.mae(targets, predictions)

    # Define the optimize operation
    opt = keras.optimizers.Adam()

    # Perform minimization and print trainable variables
    for j in range(10):
        opt.minimize(lambda: loss_function(params), var_list=[params])
        print_results(params)
        
### Batch Training
- divides large datasets into batches (epochs)
- performed in pandas using chunksize argument and implementing to linear regression model using a loop

        #example already has model setup
        # Load data in batches
        for batch in pd.read_csv('kc_house_data.csv', chunksize=100):
            size_batch = np.array(batch['sqft_lot'], np.float32)

            # Extract the price values for the current batch
            price_batch = np.array(batch['price'], np.float32)

            # Complete the loss, fill in the variable list, and minimize
            opt.minimize(lambda: loss_function(intercept, slope, price_batch, size_batch), var_list=[intercept, slope])

        # Print trained parameters
        print(intercept.numpy(), slope.numpy())

## Neural Networks (TF)

### Dense Layers

- applies weights to all nodes from the previous layer

#### low level approach

    # Initialize bias1
    bias1 = Variable(1.0)

    # Initialize weights1 as 3x2 variable of ones
    weights1 = Variable(ones((3, 2)))

    # Perform matrix multiplication of borrower_features and weights1
    product1 = matmul(borrower_features, weights1)

    # Apply sigmoid activation function to product1 + bias1
    dense1 = keras.activations.sigmoid(product1 + bias1)

    # Print shape of dense1
    print("\n dense1's output shape: {}".format(dense1.shape))

    # Initialize bias2 and weights2
    bias2 = Variable(1.0)
    weights2 = Variable(ones((2, 1)))

    # Perform matrix multiplication of dense1 and weights2
    product2 = matmul(dense1, weights2)

    # Apply activation to product2 + bias2 and print the prediction
    prediction = keras.activations.sigmoid(product2 + bias2)
    print('\n prediction: {}'.format(prediction.numpy()[0,0]))
    print('\n actual: 1')
    
### Activation Function

- nonlinear operation
- sigmoid
    - binary classification
- relu
    - hidden layers
- softmax
    - multiclass classification (>2 classes)
    
            # Construct input layer from borrower features
            inputs = constant(borrower_features, float32)

            # Define first dense layer
            dense1 = keras.layers.Dense(10, activation='sigmoid')(inputs)

            # Define second dense layer
            dense2 = keras.layers.Dense(8, activation='relu')(dense1)

            # Define output layer
            outputs = keras.layers.Dense(6, activation='softmax')(dense2)

            # Print first five predictions
            print(outputs.numpy()[:5])
            
### Optimizers

- stochastic gradient descent (SGD)
    - learning rate
    
            # Initialize x_1 and x_2
            x_1 = Variable(6.0,float32)
            x_2 = Variable(0.3,float32)

            # Define the optimization operation
            opt = keras.optimizers.SGD(learning_rate=0.01)

            for j in range(100):
                # Perform minimization using the loss function and x_1
                opt.minimize(lambda: loss_function(x_1), var_list=[x_1])
                # Perform minimization using the loss function and x_2
                opt.minimize(lambda: loss_function(x_2), var_list=[x_2])

            # Print x_1 and x_2 as numpy arrays
            print(x_1.numpy(), x_2.numpy())
            
- root mean squared (RMS)
    - learning rate, momentum, decay
    
            # Initialize x_1 and x_2
            x_1 = Variable(0.05,float32)
            x_2 = Variable(0.05,float32)

            # Define the optimization operation for opt_1 and opt_2
            opt_1 = keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.99)
            opt_2 = keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.00)

            for j in range(100):
                opt_1.minimize(lambda: loss_function(x_1), var_list=[x_1])
                # Define the minimization operation for opt_2
                opt_2.minimize(lambda: loss_function(x_2), var_list=[x_2])

            # Print x_1 and x_2 as numpy arrays
            print(x_1.numpy(), x_2.numpy())
            
- adaptive movement (Adam)
    - learning rate, beta1
    
### Training Networks

- use dropout to prevent overfitting
- use random draws for functions without values


### Neural Networks (Keras)

#### building models

- sequential API Sequential()
    - input layer
    - hidden layers
    - output layers
    
            # Define a Keras sequential model
            model = keras.Sequential()

            # Define the first dense layer
            model.add(keras.layers.Dense(16, activation='relu', input_shape=(784,)))

            # Define the second dense layer
            model.add(keras.layers.Dense(8, activation='relu'))

            # Define the output layer
            model.add(keras.layers.Dense(4, activation='softmax'))
            
            # Compile the model
            model.compile('adam', loss='categorical_crossentropy')

            # Print the model architecture
            print(model.summary())
            
- functional API Input()
    - good for merging models
    
            # For model 1, pass the input layer to layer 1 and layer 1 to layer 2
                    m1_layer1 = keras.layers.Dense(12, activation='sigmoid')(m1_inputs)
                    m1_layer2 = keras.layers.Dense(4, activation='softmax')(m1_layer1)

            # For model 2, pass the input layer to layer 1 and layer 1 to layer 2
            m2_layer1 = keras.layers.Dense(12, activation='relu')(m2_inputs)
            m2_layer2 = keras.layers.Dense(4, activation='softmax')(m2_layer1)

            # Merge model outputs and define a functional model
            merged = keras.layers.add([m1_layer2, m2_layer2])
            model = keras.Model(inputs=[m1_inputs, m2_inputs], outputs=merged)

            # Print a model summary
            print(model.summary())
            
#### training and evaluation models
- fit(features, labels, batch_size, epochs, validation_split) arguments
- add metrics='accuracy' to the compile step
- perform model.evaluation(test_set)

        # Set the optimizer, loss function, and metrics
        model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])

        # Add the number of epochs and the validation split
        model.fit(sign_language_features, sign_language_labels, epochs=10, validation_split=0.1)
        
        # Evaluate the small model using the train data
        small_train = small_model.evaluate(train_features, train_labels)

        # Evaluate the small model using the test data
        small_test = small_model.evaluate(test_features, test_labels)
        
#### Estimators API model training

- less flexible
- high level module
- enforces best practices
- faster deployment
- many models to select from
- steps
    - define feature columns
    - load and transform data
    - define estimator
    - apply train operation
    
            # Define feature columns for bedrooms and bathrooms
            bedrooms = feature_column.numeric_column("bedrooms")
            bathrooms = feature_column.numeric_column("bathrooms")

            # Define the list of feature columns
            feature_list = [bedrooms, bathrooms]

            def input_fn():
                # Define the labels
                labels = np.array(housing.price)
                # Define the features
                features = {'bedrooms':np.array(housing['bedrooms']), 
                            'bathrooms':np.array(housing['bathrooms'])}
                return features, labels
                
            # Define the model and set the number of steps
            model = estimator.DNNRegressor(feature_columns=feature_list, hidden_units=[2,2])
            model.train(input_fn, steps=1)
            
            # Define the model and set the number of steps
            model = estimator.LinearRegressor(feature_columns=feature_list)
            model.train(input_fn, steps=2)