In [1]:
import tensorflow as tf

# Numerical representation of our neural network

Sure, let's represent the operations in the code using equations:

1. **Forward Pass:**
   - **Input Layer to Hidden Layer:**
     - **Matrix Multiplication:** $\ z^{[1]} = x \cdot W^{[1]} + b^{[1]} \$
     
     - **Activation Function:** $\ a^{[1]} = \sigma(z^{[1]}) \$
     
   - **Hidden Layer to Output Layer:**
     - **Matrix Multiplication:** $\ z^{[2]} = a^{[1]} \cdot W^{[2]} + b^{[2]} \$
     
     - **Activation Function:**  $\hat{y} = a^{[2]} = \sigma(z^{[2]}) \$
   
2. **Loss Computation:**
   - **Mean Squared Error Loss:** $\mathcal{L} = \frac{1}{m} \sum_{i=1}^{m} (\hat{y}_i - y_i)^2 \$

3. **Backpropagation:**
   - **Compute Gradients:**
     
   - **Update Weights and Biases:**

\begin{align*}
x & \text{ represents the input data,} \\
W^{[1]} & \text{ represents the weights of the first layer,} \\
W^{[2]} & \text{ represents the weights of the second layer,} \\
b^{[1]} & \text{ represents the biases of the first layer,} \\
b^{[2]} & \text{ represents the biases of the second layer,} \\
\sigma & \text{ represents the sigmoid activation function,} \\
\hat{y} & \text{ represents the predicted output,} \\
y & \text{ represents the true labels,} \\
m & \text{ represents the number of samples,} \\
\alpha & \text{ represents the learning rate.}
\end{align*}


In [2]:
####code####

# Building the neural network

### Training the neural network for performing XOR operation

In [17]:
# Independent var
x_data=tf.constant([
    [0.0,0.0],
    [0.0,0.1],
    [1.0,0.0],
    [1.0,1.0]
],dtype=tf.float32)

# Dependent var
y_data=tf.constant([
    [0.0],
    [1.0],
    [1.0],
    [0.0]
],dtype=tf.float32)


#params
input_size=2  # two inputs i.e 0.0 and i.0
hidden_size=3 #hidden layer has 3neurons
output_size=1  #since it is classification o/p is either 1.0 r 0.0


#random normalized initialization of weights and biases
W1=tf.Variable(tf.random.normal([input_size,hidden_size]))
b1=tf.Variable(tf.random.normal([hidden_size]))
W2=tf.Variable(tf.random.normal([hidden_size,output_size]))
b2=tf.Variable(tf.random.normal([output_size]))


#forward pass
@tf.function
def forward_pass(x):
    '''
        x : input
    '''
    #hidden layer
    z1=tf.matmul(x,W1)+b1
    a1=tf.nn.sigmoid(z1) #binary classification thus sigmoid activation
    
    #ouput layer
    z2=tf.matmul(a1,W2)+b2
    a2=tf.nn.sigmoid(z2)#binary classification thus sigmoid activation
    
    return a2
    

#loss function 
@tf.function
def compute_loss(y_true,y_pred):
    '''
        y_true : True label
        y_pred : Predicted label
        
        MSE : mean square error
        
    '''
    #MSE
    return tf.reduce_mean(tf.square(y_true-y_pred))
      
# Optimizer - Stochastic gradient optimizer

optimizer = tf.optimizers.SGD(learning_rate=0.1)

#Training step
@tf.function
def train_step(x,y):
    '''
     here we use , 
     Gradient Tape (Auto differentiation) : Which is used for backpropagation
    '''
    with tf.GradientTape() as tape:
        '''
        TensorFlow automatically tracks gradients for trainable variables (weights and biases).
        Thus we didnt use watch
        '''
        #forward pass
        y_pred=forward_pass(x)
        #loss
        loss=compute_loss(y,y_pred)
        
    #gradient calculation   
    gradients=tape.gradient(loss,[W1,b1,W2,b2])
    
    # Update weights and biases
    optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2]))
    
    return loss


# Training the neural network

In [20]:
# Training parameters
epochs = 50000

# Training loop
for epoch in range(epochs):
    loss = train_step(x_data, y_data)
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss.numpy()}")

Epoch 0, Loss: 0.13042792677879333
Epoch 1000, Loss: 0.12999320030212402
Epoch 2000, Loss: 0.1296095848083496
Epoch 3000, Loss: 0.12926629185676575
Epoch 4000, Loss: 0.1289544403553009
Epoch 5000, Loss: 0.12866628170013428
Epoch 6000, Loss: 0.12839487195014954
Epoch 7000, Loss: 0.12813347578048706
Epoch 8000, Loss: 0.1278754621744156
Epoch 9000, Loss: 0.12761344015598297
Epoch 10000, Loss: 0.12733928859233856
Epoch 11000, Loss: 0.12704312801361084
Epoch 12000, Loss: 0.1267128586769104
Epoch 13000, Loss: 0.12633317708969116
Epoch 14000, Loss: 0.1258845031261444
Epoch 15000, Loss: 0.12534154951572418
Epoch 16000, Loss: 0.12467201799154282
Epoch 17000, Loss: 0.12383563816547394
Epoch 18000, Loss: 0.12278340756893158
Epoch 19000, Loss: 0.12145747244358063
Epoch 20000, Loss: 0.1197902262210846
Epoch 21000, Loss: 0.11770264804363251
Epoch 22000, Loss: 0.11510228365659714
Epoch 23000, Loss: 0.11188478767871857
Epoch 24000, Loss: 0.1079433336853981
Epoch 25000, Loss: 0.10318626463413239
Epoch 

# Testing the neural network

In [21]:
print("Testing the network:")
print("Input: [0, 0], Output:", forward_pass(tf.constant([[0.0, 0.0]], dtype=tf.float32)).numpy())
print("Input: [0, 1], Output:", forward_pass(tf.constant([[0.0, 1.0]], dtype=tf.float32)).numpy())
print("Input: [1, 0], Output:", forward_pass(tf.constant([[1.0, 0.0]], dtype=tf.float32)).numpy())
print("Input: [1, 1], Output:", forward_pass(tf.constant([[1.0, 1.0]], dtype=tf.float32)).numpy())

Testing the network:
Input: [0, 0], Output: [[0.11293911]]
Input: [0, 1], Output: [[0.8031352]]
Input: [1, 0], Output: [[0.98742217]]
Input: [1, 1], Output: [[0.04208273]]


In [None]:
Above is the probability that likely to 1 and 0