In [None]:
pip install tensorflow

In [None]:
pip install keras

In [None]:
from tensorflow.keras.datasets import mnist

- The MNIST dataset contains four numpy arrays: 
1. `train_images` and `train_labels` are the training set 
2. `test_images` and `test_labels` form the test set
3. Images are NumPy arrays and labels are arrays of digits (range: 0-9)

In [4]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

- Review the training data

In [5]:
train_images.shape

(60000, 28, 28)

In [6]:
len(train_labels)

60000

In [7]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [8]:
test_images.shape

(10000, 28, 28)

In [9]:
len(test_labels)

10000

In [10]:
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

- Build the network

In [11]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([layers.Dense(512, activation="relu"), 
                          layers.Dense(10, activation="softmax")])

### Notes on Neural Networks:
- **layer** : core building block of the neural networks; serves as filter of data; each layer extracts representations of data fed into it; layers can be chained together to perform progressive data distillation

- **Dense layer** : densely connected (_fully connected_) neural layer 
 (e.g. other type of dense layer: **softmax classification layer** : returns arrays of of x probability scores that sum up to 1; score represents probability that current sample belongs to one of the x classes)

 #### Preparing Neural Network for Training : A Checklist

 - What is the **optimizer**? : The optimizer is the mechanism the model uses to _update itself based on training data it consumes_ with the ultimate goal being to improve its performance

 - What is the **loss function**? This function enables the model to measure its performance on the training data 

 - What **metrics** will be monitored during training and testing? This may vary but typically _accuracy_ is the metric most often monitored 


In [12]:
# Compilation step
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [13]:
#Prepare training data - reshape data into shape that the model expects; scale so that all values 
# are in [0,1] interval
#Refer to cells above to see how data has changed

train_images = train_images.reshape((60000, 28 * 28)) #reshape images from (60000,28,28)
train_images = train_images.astype("float32") / 255 #convert image type from uint8 to float32
test_images = test_images.reshape((10000, 28 * 28)) #reshape test images from 10000, 28, 28
test_images = test_images.astype("float32") / 255 # the '/ 255 ' bit is to scale data from [0,255] to [0,1] interval 

In [14]:
#Train Model (Note: data is now "consumable" because we reshaped and retyped it)
model.fit(train_images, train_labels,epochs=5, batch_size=128)
#Notice in the output that accuracy and loss of the model over the training data are calculated

Epoch 1/5


2024-10-24 15:26:44.369007: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8674 - loss: 0.4676
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9656 - loss: 0.1183
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9791 - loss: 0.0735
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9857 - loss: 0.0501
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9895 - loss: 0.0371


<keras.src.callbacks.history.History at 0x75c124581e80>

In [15]:
#Use the model to make predictions: 
test_digits = test_images[0:10] # look at the first 10 images
predictions = model.predict(test_digits) # model makes predictions on set of 10 images we selected
predictions[0] #Let's take a look at the first of these predictions

#Interpret output: Any number at index (i) in the output array corresponds to a probability [0,1] that the sample at that index belongs to class i

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


array([1.6096466e-07, 4.5687354e-08, 1.6766706e-05, 2.5510314e-04,
       8.7381269e-10, 3.0916195e-07, 3.0422298e-10, 9.9971509e-01,
       3.3935130e-06, 9.0241165e-06], dtype=float32)

In [16]:
# Identify the predictiont the model makes : i.e. the number it believes matches the image
predictions[0].argmax()

7

In [17]:
#Retrieve the probability that the image at the 0th index is the number 7
predictions[0][7]

0.9997151

In [18]:
#Compare this result with the test labels
test_labels[0]

7

In [19]:
#Now look at newer data
test_loss, test_acc = model.evaluate(test_images, test_labels)
print (f"test_acc: {test_acc}")
#The resulting output may be a symptom of overfitting 

[1m 84/313[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.9796 - loss: 0.0704

2024-10-24 15:28:31.453237: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 31360000 exceeds 10% of free system memory.


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9764 - loss: 0.0781
test_acc: 0.9789999723434448


### Data Representations for Neural Networks

Recall:

- NumPy arrays can also be considered **tensors**.
- **tensors**: a container for data; typically numerical data
- Tensors have various ranks:
    - Scalars (rank-0 tensors)
    - Vectors (rank-1 tensors)
    - Matrices (rank-2 tensors)

#### Scalars(rank-0/0D tensor)
- `float32` or `float64` numbers are scalar tensors (scalar arrays) in NumPy
- `ndim` attribute shows the number of axes in the tensor; for scalar tensors, `ndim == 0`
- **rank**: number of axes in tensor

In [20]:
import numpy as np
x = np.array(12)

In [21]:
x # See initialized scalar array

array(12)

In [22]:
x.ndim # Retrieve the rank for this scalar array

0

#### Vectors (rank-1 tensors)
- **vector**: array of numbers; rank-1 tensor/1D tensor

In [23]:
x = np.array([12,3,6,14,7]) #Initialize Numpy array w/ values

Remember: 
- The array below is a 5 dimensional (5D) vector, with 5 entries but only ONE axis. 
- This is distinguished from 5D tensor (or _tensor of rank 5_) : has 5 axes with any number along each axis


In [24]:
x #view intialized array; NOTE: Five entries in this array indicates it is a 5 dimensional vector

array([12,  3,  6, 14,  7])

In [25]:
x.ndim #Identify the rank of the vector

1