In [1]:
import numpy as np
#import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras.losses import SparseCategoricalCrossentropy
from tensorflow.python.keras.layers import Dense, Input
from tensorflow.python.keras import Sequential
from win32gui import DestroyCaret

### coffee roasting example

In [8]:
X_train = np.array([[200.0,17.0]]) #always pass 2D matrix to tensorflow
layer_1 = Dense(units=3,activation='sigmoid')
a1 = layer_1(X_train)

layer_2 = Dense(units=1,activation='sigmoid')
a2 = layer_2(a1)
print(a2)

(1, 2)
tf.Tensor([[0.50011915]], shape=(1, 1), dtype=float32)


### different approach

In [10]:
y_train = np.array([1])
model = Sequential([
    Dense(units=25,activation='sigmoid'),
    Dense(units=15,activation='sigmoid'),
    Dense(units=1,activation='sigmoid')
])
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(X_train,y_train)
model.summary()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 906ms/step - accuracy: 1.0000 - loss: 0.5612


### Normalize Data
Fitting the weights to the data (back-propagation, covered in next week's lectures) will proceed more quickly if the data is normalized. This is the same procedure you used in Course 1 where features in the data are each normalized to have a similar range. 
The procedure below uses a Keras [normalization layer](https://keras.io/api/layers/preprocessing_layers/numerical/normalization/). It has the following steps:
- create a "Normalization Layer". Note, as applied here, this is not a layer in your model.
- 'adapt' the data. This learns the mean and variance of the data set and saves the values internally.
- normalize the data.  
It is important to apply normalization to any future data that utilizes the learned model.

In [None]:
norm_l = tf.keras.layers.Normalization(axis=-1)
norm_l.adapt(X_train)  # learns mean, variance
Xn = norm_l(X_train)


predictions = model.predict(np.array([1,1]))

### python implementation for forward propagation
we first define the activation function, which is sigmoid in this situation, second we define the dense function that will output the activations of the next layer, and finally we define the sequence function that will make the forward propagation

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def dense(a_in,W,b):
    units = W.shape[1]
    a_out = np.zeros(units)
    for i in range(units):
        a_out[i] = np.dot(a_in,W[:,i])+b[i]
        a_out = sigmoid(a_out)
    #shorter code
    #a_out = np.matmul(a_in,W)+b
    #a_out = sigmoid(a_out)
    return a_out


# w = np.array([
#     [1,2,4],
#     [4,5,6]
#          ])
#for this w array, you can notice that
# the number of rows represents the number of neurons in the previous layer
# the number of columns represents the number of neurons in the current layer


def sequential(x,layers):
    activations = np.zeros(len(layers) +1)
    activations[0] = x
    for i in range(len(layers)):
        b = np.random.randint(10,size = len(activations[i]))
        W = np.random.randint(10,size = (len(activations[i]),layers[i]))
        activations[i+1] = dense(activations[i],W,b)
    return activations[-1]

### code for multiclass classification
let's write a code for multiclassfication, I will write 2 versions of the code
in the next version of the code we will make the last layer linear, and then we are going to train the model to get the Zs or logits out of the model, then we will calculate the X using softmax

In [None]:
#this is the first version, and it's not very accurate
model2 = Sequential([
    Dense(units=25,activation='relu'),
    Dense(units=15,activation='relu'),
    Dense(units=10,activation='softmax')
])

model2.compile(loss = SparseCategoricalCrossentropy())
model2.fit(X_train,y_train,epochs=100)

## this is the updated version of the code

model3 = Sequential([
    Dense(units=25,activation='relu'),
    Dense(units=15,activation='relu'),
    Dense(units=10,activation='linear')
])

model3.compile(loss = SparseCategoricalCrossentropy(from_logits=True),
               optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3))
model3.fit(X_train,y_train,epochs=100)

logits = model3(X_train)
f_x = tf.nn.softmax(logits)
