## Custom Neural Network

In [30]:

import warnings
warnings.filterwarnings('ignore')

In [40]:
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()

print(iris.keys())
print()
print(iris.data[:5])
print()
print(iris.target[:5])
print()
print(iris.feature_names)
print()
print(iris.target_names)
print()
print(iris.DESCR)


dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]

[0 0 0 0 0]

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

['setosa' 'versicolor' 'virginica']

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    

In [41]:
# first 5 rows of dataframe
df1 = pd.DataFrame(data=iris.data , columns = iris.feature_names)
df1.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [42]:
df1.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')

In [43]:
df1['species'] = iris.target

In [44]:
df1.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [45]:
print(df1.shape)

(150, 5)


In [46]:
print(df1.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   species            150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB
None


In [47]:
import numpy as np
print("Any missing values in features:", np.any(np.isnan(iris.data)))
print("Any missing values in target:", np.any(np.isnan(iris.target)))


Any missing values in features: False
Any missing values in target: False


In [49]:
X = df1.drop(['species' ], axis=1 )#Feature Matrix
y = df1['species']

**Train Test Split**

In [50]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state=42)

**Scaling the feature columns of numeric data**

In [51]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [54]:
import tensorflow as tf
from tensorflow import keras

In [57]:
model = keras.Sequential([
    keras.layers.Dense(3 , input_dim = (4) , activation='softmax' , kernel_initializer = 'ones' , bias_initializer = 'zeros' )
])

In [58]:
model.compile(loss = 'sparse_categorical_crossentropy' ,
              optimizer = 'adam' ,
              metrics = ['accuracy'] )

In [59]:
model.fit(X_train , y_train , epochs=1000)

Epoch 1/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.4787 - loss: 1.0961
Epoch 2/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6769 - loss: 1.0847 
Epoch 3/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7223 - loss: 1.0746 
Epoch 4/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7944 - loss: 1.0616 
Epoch 5/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7750 - loss: 1.0528  
Epoch 6/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7763 - loss: 1.0438 
Epoch 7/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7877 - loss: 1.0329 
Epoch 8/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8085 - loss: 1.0235 
Epoch 9/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7ab2bbecdf30>

In [60]:
model.evaluate(X_test , y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - accuracy: 1.0000 - loss: 0.1316


[0.1315850168466568, 1.0]

In [69]:
y_pred = np.round(model.predict(X_test) , 2)
(y_pred[:10])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


array([[0.01, 0.85, 0.15],
       [0.98, 0.02, 0.  ],
       [0.  , 0.01, 0.99],
       [0.01, 0.68, 0.31],
       [0.  , 0.71, 0.29],
       [0.95, 0.05, 0.  ],
       [0.09, 0.84, 0.08],
       [0.  , 0.06, 0.94],
       [0.  , 0.61, 0.39],
       [0.02, 0.9 , 0.08]], dtype=float32)

In [72]:
predicted= np.array(y_pred[:10])

In [73]:
np.argmax(predicted , axis=1)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1])

In [74]:
y_test[:10]

Unnamed: 0,species
73,1
18,0
118,2
78,1
76,1
31,0
64,1
141,2
68,1
82,1


**Now get the value of weights and bias from the model**

In [75]:
coef , intercept = model.get_weights()

In [82]:
print(f'weights are {coef}   \n \n bias is  {intercept}')

weights are [[-0.34487584  1.8302851   1.7751279 ]
 [ 2.233692    0.33811066  0.11751558]
 [-0.5736547   1.0791566   2.7214537 ]
 [-0.56128746 -0.10680774  2.8746126 ]]   
 
 bias is  [-0.6779795  1.5027684 -1.3269361]


In [85]:
X_test

array([[ 0.35451684, -0.58505976,  0.55777524,  0.02224751],
       [-0.13307079,  1.65083742, -1.16139502, -1.17911778],
       [ 2.30486738, -1.0322392 ,  1.8185001 ,  1.49058286],
       [ 0.23261993, -0.36147005,  0.44316389,  0.4227026 ],
       [ 1.2077952 , -0.58505976,  0.61508092,  0.28921757],
       [-0.49876152,  0.75647855, -1.27600637, -1.04563275],
       [-0.2549677 , -0.36147005, -0.07258719,  0.15573254],
       [ 1.32969211,  0.08570939,  0.78699794,  1.49058286],
       [ 0.47641375, -1.92659808,  0.44316389,  0.4227026 ],
       [-0.01117388, -0.80864948,  0.09932984,  0.02224751],
       [ 0.84210448,  0.30929911,  0.78699794,  1.09012776],
       [-1.23014297, -0.13788033, -1.33331205, -1.44608785],
       [-0.37686461,  0.98006827, -1.39061772, -1.31260282],
       [-1.10824606,  0.08570939, -1.27600637, -1.44608785],
       [-0.86445224,  1.65083742, -1.27600637, -1.17911778],
       [ 0.59831066,  0.53288883,  0.55777524,  0.55618763],
       [ 0.84210448, -0.

In [88]:
def softmax(z):
  return np.exp(z) / np.sum(np.exp(z))

softmax([4.6,	3.1 , 	1.5 ,	0.2	])

array([0.78097135, 0.17425826, 0.03518214, 0.00958825])

In [89]:
X_test

array([[ 0.35451684, -0.58505976,  0.55777524,  0.02224751],
       [-0.13307079,  1.65083742, -1.16139502, -1.17911778],
       [ 2.30486738, -1.0322392 ,  1.8185001 ,  1.49058286],
       [ 0.23261993, -0.36147005,  0.44316389,  0.4227026 ],
       [ 1.2077952 , -0.58505976,  0.61508092,  0.28921757],
       [-0.49876152,  0.75647855, -1.27600637, -1.04563275],
       [-0.2549677 , -0.36147005, -0.07258719,  0.15573254],
       [ 1.32969211,  0.08570939,  0.78699794,  1.49058286],
       [ 0.47641375, -1.92659808,  0.44316389,  0.4227026 ],
       [-0.01117388, -0.80864948,  0.09932984,  0.02224751],
       [ 0.84210448,  0.30929911,  0.78699794,  1.09012776],
       [-1.23014297, -0.13788033, -1.33331205, -1.44608785],
       [-0.37686461,  0.98006827, -1.39061772, -1.31260282],
       [-1.10824606,  0.08570939, -1.27600637, -1.44608785],
       [-0.86445224,  1.65083742, -1.27600637, -1.17911778],
       [ 0.59831066,  0.53288883,  0.55777524,  0.55618763],
       [ 0.84210448, -0.

**Instead of model.predict, write our own prediction function that uses w1,w2 , w3 , w4 and bias**

In [90]:
df1.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'species'],
      dtype='object')

In [97]:
def prediction_function(sepal_length , sepal_width , petal_length , petal_width):
  weighted_sum = coef[0]*sepal_length + coef[1]*sepal_width + coef[2]*petal_length + coef[3]*petal_width + intercept
  return softmax(weighted_sum)

In [98]:
prediction_function(5.1 , 3.5 , 1.4 , 0.2)

array([8.6008280e-05, 7.2854894e-01, 2.7136505e-01], dtype=float32)

Now we start implementing gradient descent in plain python. Again the goal is to come up with same w1, w2 and bias that keras model calculated. We want to show how keras/tensorflow would have computed these values internally using gradient descent

First write couple of helper routines such as softmax and log_loss

In [99]:
def softmax(z):
  return np.exp(z) / np.sum(np.exp(z))

softmax([4.6,	3.1 , 	1.5 ,	0.2	])

array([0.78097135, 0.17425826, 0.03518214, 0.00958825])

In [120]:
def log_loss(y_true , y_predicted):
  epsilon = 1e-15
  y_predicted_new = [max(i , epsilon) for i in y_predicted]
  y_predicted_new = [min(i , 1-epsilon) for i in y_predicted_new]
  y_predicted_new = np.array(y_predicted_new)
  return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

## Custom Neural Network

In [121]:
df1.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'species'],
      dtype='object')

In [135]:
class MyNN:
  def __init__(self):
    self.w1 = 1
    self.w2 = 1
    self.w3 = 1
    self.w4 = 1
    self.bias = 0

  def fit(self , X , y , epochs , loss_thresold):

    sepal_length = X['sepal length (cm)']
    sepal_width = X['sepal width (cm)']
    petal_length = X['petal length (cm)']
    petal_width = X['petal width (cm)']

    self.w1 , self.w2 , self.w3 , self.w4 , self.bias = self.gradient_descent(sepal_length, sepal_width, petal_length, petal_width, y ,epochs , loss_thresold)
    print(f"final weights and bias : w1: {self.w1}  , w2: {self.w2} , w3: {self.w3} , w4: {self.w4} , bias: {self.bias}")

  def predict(self , X_test):
    weighted_sum = self.w1*X_test['sepal length (cm)'] + self.w2*X_test['sepal width (cm)'] + self.w3*X_test['petal length (cm)']
    + self.w4*X_test['petal width (cm)'] + self.bias
    return softmax(weighted_sum)

  def gradient_descent(self,sepal_length , sepal_width , petal_length , petal_width , y_true , epochs , loss_thresold):
    w1=w2=w3=w4 = 1
    bias = 0
    rate = 0.5
    n = len(sepal_length)
    for i in range(epochs):
      weighted_sum = w1*sepal_length + w2*sepal_width + w3*petal_length + w4*petal_width + bias
      y_predicted = softmax(weighted_sum)
      loss = log_loss(y_true , y_predicted)

      w1d = (1/n) * np.dot(np.transpose(sepal_length) , (y_predicted - y_true))
      w2d = (1/n) * np.dot(np.transpose(sepal_width) , (y_predicted - y_true))
      w3d = (1/n) * np.dot(np.transpose(petal_length) , (y_predicted - y_true))
      w4d = (1/n) * np.dot(np.transpose(petal_width) , (y_predicted - y_true))
      bias_d = np.mean(y_predicted - y_true)

      w1 = w1 - rate * w1d
      w2 = w2 - rate * w2d
      w3 = w3 - rate * w3d
      w4 = w4 - rate * w4d
      bias = bias - rate * bias_d

      if i%50 == 0:
        print(f'Epoch:{i} , w1: {w1} , w2: {w2} , w3: {w4} , w4: {w4} , bias: {bias} , loss: {loss}')

      if loss <= loss_thresold:
        print(f'Epoch:{i} , w1: {w1} , w2: {w2} , w3: {w4} , w4: {w4} , bias: {bias} , loss: {loss}')
        break
    return w1 , w2 , w3 , w4 , bias


In [136]:
customModel = MyNN()
customModel.fit(X_train , y_train , epochs=1000 , loss_thresold=0.1691)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [124]:
X_train.shape , y_train.shape

((120, 4), (120,))

In [132]:
X_test.shape , y_test.shape

((30, 4), (30,))