---

Load libraries

---

In [39]:
## Load libraries
import numpy as np
import sympy as sp
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.style.use('dark_background')
from keras.datasets import mnist
%matplotlib inline

---

Set printing precision

---

In [40]:
np.set_printoptions(precision = 2)

---

Import tensorflow and check version

---

In [41]:
import tensorflow as tf

In [42]:
tf.__version__

'2.15.0'

In [43]:
## Mount Google drive folder if running in Colab
if('google.colab' in sys.modules):
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    DIR = '/content/drive/MyDrive/Colab Notebooks/MAHE/MSIS Coursework/OddSem2023MAHE'
    DATA_DIR = DIR + '/Data/'
else:
    DATA_DIR = 'Data/'

Mounted at /content/drive


---

Calculating softmax loss and gradient for a toy dataset

----

In [50]:
# Generate artificial data with 5 samples, 4 features per sample
# and 3 output classes
num_samples = 5 # number of samples
num_features = 4 # number of features (a.k.a. dimensionality)
num_labels = 3 # number of output labels
# Data matrix (each column = single sample)
X = np.random.choice(np.arange(0, 5), size = (num_features, num_samples), replace = True)
# Class labels
y = np.random.choice([0, 1, 2], size = num_samples, replace = True)
# Randomly assign entries of weights matrix
W = np.random.choice(np.arange(-4, 4), size = (num_labels, num_features), replace = True)
print('X = ')
print(X)
print('y = ')
print(y)
print('W = ')
print(W)

X = 
[[1 0 0 1 0]
 [3 0 1 3 2]
 [2 0 3 4 2]
 [3 3 4 2 4]]
y = 
[0 1 0 2 0]
W = 
[[ 1 -2  2 -1]
 [-1 -1  3 -2]
 [ 1 -3  3  3]]


---

Add the bias feature to the data matrix (run this cell only once!)

---

In [52]:
# Add the bias feature to the data matrix (run this cell only once!)
print('X = ')
print(X)
print('X with bias feature = ')
X = np.vstack([X, np.ones((1, num_samples))])
print(X)

X = 
[[1 0 0 1 0]
 [3 0 1 3 2]
 [2 0 3 4 2]
 [3 3 4 2 4]]
X with bias feature = 
[[1. 0. 0. 1. 0.]
 [3. 0. 1. 3. 2.]
 [2. 0. 3. 4. 2.]
 [3. 3. 4. 2. 4.]
 [1. 1. 1. 1. 1.]]


---

Adjust the weight matrix with (possibly random) values added
for bias as the last column (run this cell only once!)

---

In [53]:
# Adjust the weight matrix with (possibly random) values added
# for bias as the last column (run this cell only once!)
W = np.hstack([W, np.ones((num_labels, 1))])
print(W)

[[ 1. -2.  2. -1.  1.]
 [-1. -1.  3. -2.  1.]
 [ 1. -3.  3.  3.  1.]]


---

Calculate the raw zcores matrix

---

In [57]:
Z = np.dot(W, X)
print('Z = ')
print(Z)

Z = 
[[-3. -2.  1.  2. -3.]
 [-3. -5.  1.  5. -3.]
 [ 8. 10. 19. 11. 13.]]


In [58]:
print(y)

[0 1 0 2 0]


---

Define softmax function

---


In [70]:
# Define softmax function
def softmax(Z):
  # Convert scores to non-normalized probabilites matrix. Note that for each sample,
  # that is in each column, the values don't add up to 1. Also note that the
  # output values are typically large or small
  Z_exp = np.exp(Z - np.max(Z, axis = 0))
  # Normalize probabilities matrix such that the sum across each column is equal to 1.
  # Now we have actually probability values for each sample.
  return(Z_exp / np.sum(Z_exp, axis = 0))

---

Calculate the probability matrix

---

In [72]:
#  Calculate the probability matrix
P = softmax(Z)
print(Z)
print(P)
# Sum in each column of matrix P
print(np.sum(P, axis = 0))
# Print the correct label for each sample
print(y)

[[-3. -2.  1.  2. -3.]
 [-3. -5.  1.  5. -3.]
 [ 8. 10. 19. 11. 13.]]
[[1.67e-05 6.14e-06 1.52e-08 1.23e-04 1.13e-07]
 [1.67e-05 3.06e-07 1.52e-08 2.47e-03 1.13e-07]
 [1.00e+00 1.00e+00 1.00e+00 9.97e-01 1.00e+00]]
[1. 1. 1. 1. 1.]
[0 1 0 2 0]


---

Calculate training loss for all samples.

---

In [80]:
loss = -np.log(P[y, np.arange(num_samples)])
print('Loss = ')
print(loss)
# Calculate average training loss
loss_data = np.mean(loss)
print('Total loss = %f'%(loss_data))

Loss = 
[1.1e+01 1.5e+01 1.8e+01 2.6e-03 1.6e+01]
Total loss = 12.000528


---

Calculate regularization loss

---


In [84]:
print(W)
print(W[:, :-1])
print(W[:, :-1] * W[:, :-1])

[[ 1. -2.  2. -1.  1.]
 [-1. -1.  3. -2.  1.]
 [ 1. -3.  3.  3.  1.]]
[[ 1. -2.  2. -1.]
 [-1. -1.  3. -2.]
 [ 1. -3.  3.  3.]]
[[1. 4. 4. 1.]
 [1. 1. 9. 4.]
 [1. 9. 9. 9.]]


In [85]:
# Regularization loss
reg = 0.1 # strength of regularization = 10%
loss_reg = np.sum(W[:, :-1] * W[:, :-1])
print('Total loss = %f'%(loss_data + reg * loss_reg))

Total loss = 17.300528


---

Calculate the gradient of total loss w.r.t. the weights W

---


In [87]:
# Adjust the probability matrix such that 1 is subtracted
# from each samples correct category probability.
P[y, range(num_samples)] = P[y, range(num_samples)]  - 1

# Calculate the gradient of total loss w.r.t. the weights W
dW =(1/num_samples)*np.dot(P, X.T) + reg * 2 * np.hstack([W[:, :-1], np.zeros((num_labels, 1))])
print(dW)

[[-2.00e-01 -2.80e+00 -2.40e+00 -4.60e+00 -1.20e+00]
 [-2.00e-01 -1.99e-01  6.02e-01 -1.60e+00 -4.00e-01]
 [ 1.99e-01 -1.58e-03  1.20e+00  3.00e+00  5.99e-01]]


---

Apply gradient descent to the toy dataset

---


In [None]:
alpha = 1e-02 # learning rate
tol = 1e-05 # stopping tolerance
iter = 0
maxiter = 1000

while np.linalg.norm(dW) > tol and iter < maxiter:
  W = W + alpha * (-dW)
  iter = iter+1
  print('Iteration = %d, ||gradL(W)|| = %f'%(iter, np.linalg.norm(dW)))

---

TensorFlow includes a low-level API known as TensorFlow core and many high-level APIs, including Keras (tf.keras).

Now we will focus on the TensorFlow low-level API starting with *TensorFlow constants* (https://www.tensorflow.org/guide/tensor) which have the following proprties:

1. Values are stored at the time of defining the tensor
2. Immutable

---

In [None]:
T1 = tf.constant(5.0, dtype = tf.float16, name = 't1')
T2 = tf.constant(8.0, dtype = tf.float16, name = 't2')
T3 = tf.constant(10.0, dtype = tf.float16, name = 't3')
T4 = tf.constant([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]], dtype = tf.float32, name = 't4')

print(T1)
print(T2)
print(T3)
print(T4)

---

Elementwise operations on constant tensors

---

In [None]:
print(T1+T2)
print(T1-T2)

---

Built-in operations

---

In [None]:
op1 = tf.add(T1, T2)
op2 = tf.exp(T4)
print(op1)
print(op2)

---

TensorFlow session is applicable only for **TensorFlow version 1** which allows for defining a computation (data flow) graph such that the nodes are the operations and edges are the tensors followed by an execution of the graph.

**TensorFlow version 2** has eager execution (execute immediately withut creating a session).

If version 1 is to be used then, we import TensorFlow as follows:

$$\begin{align*}&\texttt{import tensorflow.compat.v1 as tf}\\&\texttt{
tf.disable_v2_behavior()}\end{align*}$$

or eager execution in version 2 can be disabled using $$\begin{align*}&\texttt{import tensorflow as tf}\\&\texttt{tf.compat.v1.disable_eager_execution()}\end{align*}$$

----

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.__version__

In [None]:
T1 = tf.constant(5.0, dtype = tf.float16, name = 't1')
T2 = tf.constant(8.0, dtype = tf.float16, name = 't2')
T4 = tf.constant([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]], dtype = tf.float32, name = 't4')
op1 = tf.add(T1, T2)
op2 = tf.exp(T4)
print(op1)
print(op2)
with tf.Session() as sess:
  print(sess.run(op1))
  print(sess.run(op2))

---

Placeholders (applicable only for **TensorFlow version 1**):  a TensorFlow computation graph can be parameterized to accept external inputs (such as input data for a machine learning algorithm) during runtime using placeholders. That is, placeholders let are empty tensors whose values can be provided at runtime.

---



In [None]:
T1 = tf.placeholder(tf.float32)
T2 = tf.placeholder(tf.float32)
# Define some operations
op1 = T1 + T2
op2 = T1 * T2
with tf.Session() as sess:
  print('mutiply: ', sess.run(op1, feed_dict = {T1: 2, T2: 3}))
  print('add: ', sess.run(op2, feed_dict = {T1: 2, T2: 3}))

---

Re-import TensorFlow version 2

---

In [None]:
import tensorflow as tf
tf.__version__

---

Tensors from other Python objects such as lists, NumPy arrays, and pandas DataFrames using tf.convert_to_tensor()

---

In [None]:
T = tf.convert_to_tensor(np.array([1, 2, 3, 4]), dtype = tf.float64)
print(T)

---

Variables (https://www.tensorflow.org/guide/variable)

---

---

Automatic differentiation using TF (https://www.tensorflow.org/guide/autodiff)

Example: calculate the sensitivity of $L(w) = 4w+w^3$ w.r.t. the input $w$ at $w=1.$

Sensitivity $\nabla_wL = 4+3w^2,$ which at $w=1$ is equal to $4+3\times1^2=7.$

---