In [50]:
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Task 1:
### Load the IRIS dataset available on Kaggle in your notebooks

In [5]:
df = pd.read_csv('IRIS_dataset.csv')

### Performing EDA on the dataset:

In [6]:
# Performing EDA on the dataset:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [8]:
df.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object

In [9]:
df1 = pd.get_dummies(df['species'])

In [10]:
df1.head()

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
0,True,False,False
1,True,False,False
2,True,False,False
3,True,False,False
4,True,False,False


### Task 2: Pre-procesing of the dataset:
#### a. Convert categorical values to numerical values using one hot encoder.
#### b. Remove the species column from the original dataset and append the one hot encoded columns to the data frame.
#### c. Scale the four feature columns of the data frame using standard scaler.

In [11]:
df.drop("species", axis=1, inplace=True)

In [12]:
final_df = pd.concat([df, df1], axis=1)
final_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Iris-setosa,Iris-versicolor,Iris-virginica
0,5.1,3.5,1.4,0.2,True,False,False
1,4.9,3.0,1.4,0.2,True,False,False
2,4.7,3.2,1.3,0.2,True,False,False
3,4.6,3.1,1.5,0.2,True,False,False
4,5.0,3.6,1.4,0.2,True,False,False
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,False,False,True
146,6.3,2.5,5.0,1.9,False,False,True
147,6.5,3.0,5.2,2.0,False,False,True
148,6.2,3.4,5.4,2.3,False,False,True


In [13]:
# Need to convert all the Iris-setosa and othe two columms that follow it to type int in the format:
# df["somecolumn"] = df["somecolumn"].astype(int)
final_df["Iris-setosa"] = final_df["Iris-setosa"].astype(int)
final_df["Iris-versicolor"] = final_df["Iris-versicolor"].astype(int)
final_df["Iris-virginica"] = final_df["Iris-virginica"].astype(int)

In [14]:
final_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Iris-setosa,Iris-versicolor,Iris-virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
2,4.7,3.2,1.3,0.2,1,0,0
3,4.6,3.1,1.5,0.2,1,0,0
4,5.0,3.6,1.4,0.2,1,0,0


In [15]:
scaler = StandardScaler()
final_df.iloc[:, [0, 1, 2, 3]] = scaler.fit_transform(final_df.iloc[:, [0, 1, 2, 3]])

In [16]:
final_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Iris-setosa,Iris-versicolor,Iris-virginica
0,-0.900681,1.032057,-1.341272,-1.312977,1,0,0
1,-1.143017,-0.124958,-1.341272,-1.312977,1,0,0
2,-1.385353,0.337848,-1.398138,-1.312977,1,0,0
3,-1.506521,0.106445,-1.284407,-1.312977,1,0,0
4,-1.021849,1.26346,-1.341272,-1.312977,1,0,0


### Task 3: Building the three-layer feedforward neural network.
#### a. Build the three-layer feedforward neural network, use sigmoid as the activation.
#### b. Initialize the weights and biases.
#### c. Compute the output of the hidden layer.
#### d. Computer the output of the final layer.


In [20]:
# Initialize the weights and bisases first:
np.random.seed(42)
w_i_h1 = np.random.rand(4, 1)
w_i_h2 = np.random.rand(4, 1)
w_h_o1 = np.random.rand(2, 1)
w_h_o2 = np.random.rand(2, 1)
w_h_o3 = np.random.rand(2, 1)
bias1 = np.random.rand(1)
bias2 = np.random.rand(1)

In [21]:
w_i_h1

array([[0.37454012],
       [0.95071431],
       [0.73199394],
       [0.59865848]])

In [22]:
w_i_h2

array([[0.15601864],
       [0.15599452],
       [0.05808361],
       [0.86617615]])

In [26]:
w_h_o1

array([[0.60111501],
       [0.70807258]])

In [27]:
bias1

array([0.18182497])

In [28]:
# Function for sigmoid function (which we are using as an activation function).
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

input = final_df.iloc[:, 0:4] # We are taking the first four columns as input.

In [29]:
# Feed forward Step 1 - input to hidden layer
Z2_1 = np.dot(input, w_i_h1) + bias1
Z2_2 = np.dot(input, w_i_h2) + bias2

# Feed forward Step 2:
A2_1 = sigmoid(Z2_1)
A2_2 = sigmoid(Z2_2)

print(A2_1, "\n", A2_2)

[[0.28046573]
 [0.10593928]
 [0.13879898]
 [0.11842493]
 [0.3170122 ]
 [0.58813899]
 [0.21242753]
 [0.2375955 ]
 [0.07044872]
 [0.12455428]
 [0.41960762]
 [0.22880931]
 [0.0947476 ]
 [0.0685752 ]
 [0.59682731]
 [0.81893726]
 [0.54730211]
 [0.29662004]
 [0.54827875]
 [0.45963167]
 [0.28881875]
 [0.42479587]
 [0.24683447]
 [0.26480334]
 [0.25158614]
 [0.11875484]
 [0.27550533]
 [0.29835468]
 [0.24661118]
 [0.15440974]
 [0.13295951]
 [0.30428742]
 [0.59535222]
 [0.68554282]
 [0.12455428]
 [0.15048785]
 [0.30954463]
 [0.12455428]
 [0.08306264]
 [0.24591384]
 [0.27879377]
 [0.02151319]
 [0.12331044]
 [0.35676002]
 [0.52083375]
 [0.10913741]
 [0.4504364 ]
 [0.13835052]
 [0.40859742]
 [0.19348366]
 [0.82895524]
 [0.78607121]
 [0.81378869]
 [0.18960783]
 [0.62443586]
 [0.48658356]
 [0.83725964]
 [0.11584745]
 [0.6493959 ]
 [0.33813137]
 [0.05821056]
 [0.62471757]
 [0.15686129]
 [0.62476111]
 [0.43691525]
 [0.74973886]
 [0.62207153]
 [0.34730238]
 [0.27104219]
 [0.23753853]
 [0.80775549]
 [0.47

In [30]:
A2 = np.append(A2_1, A2_2, axis=1)

In [31]:
A2

array([[0.28046573, 0.26672584],
       [0.10593928, 0.22625228],
       [0.13879898, 0.23174005],
       [0.11842493, 0.22323584],
       [0.3170122 , 0.27010199],
       [0.58813899, 0.36065366],
       [0.21242753, 0.26346438],
       [0.2375955 , 0.25673706],
       [0.07044872, 0.20420536],
       [0.12455428, 0.21348027],
       [0.41960762, 0.29336704],
       [0.22880931, 0.25020799],
       [0.0947476 , 0.20385402],
       [0.0685752 , 0.18742803],
       [0.59682731, 0.33068575],
       [0.81893726, 0.41534004],
       [0.54730211, 0.35761287],
       [0.29662004, 0.28958106],
       [0.54827875, 0.33943989],
       [0.45963167, 0.31306608],
       [0.28881875, 0.27273829],
       [0.42479587, 0.33003148],
       [0.24683447, 0.25295103],
       [0.26480334, 0.32476731],
       [0.25158614, 0.25207153],
       [0.11875484, 0.23074935],
       [0.27550533, 0.30323567],
       [0.29835468, 0.27109167],
       [0.24661118, 0.26337667],
       [0.15440974, 0.23350889],
       [0.

In [32]:
# Feed forward Step 3 - input from hidden layer to output (we don't have bias for this)
Z3_1 = np.dot(A2, w_h_o1)
Z3_2 = np.dot(A2, w_h_o2)
Z3_3 = np.dot(A2, w_h_o3)

In [33]:
# Generating the outputs:

o1 = sigmoid(Z3_1)
o2 = sigmoid(Z3_2)
o3 = sigmoid(Z3_3)

In [34]:
print(o1[2], o2[2], o3[2])

[0.56156672] [0.55666158] [0.54109451]


In [35]:
target_values = final_df[["Iris-setosa", "Iris-versicolor", "Iris-virginica"]]

In [36]:
target_values

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
...,...,...,...
145,0,0,1
146,0,0,1
147,0,0,1
148,0,0,1


In [37]:
output_concat = np.concatenate([o1, o2, o3], axis = 1)

In [38]:
m, n = target_values.shape

### Step 4: Error calculation
#### a. Compute the total squared error.

In [39]:
error = np.sum(((target_values.values - output_concat) ** 2))/(2 * m)

In [40]:
error

0.48278238808222823

### Task 5: Change the initial weights and biases and compute the error again
#### Seed value of 60:

In [42]:
# Changing the seed value and seeing how the error varies accordingly.

# Initialize the weights and bisases first:
np.random.seed(60)
w_i_h1 = np.random.rand(4, 1)
w_i_h2 = np.random.rand(4, 1)
w_h_o1 = np.random.rand(2, 1)
w_h_o2 = np.random.rand(2, 1)
w_h_o3 = np.random.rand(2, 1)
bias1 = np.random.rand(1)
bias2 = np.random.rand(1)

# Feed forward Step 1 - input to hidden layer
Z2_1 = np.dot(input, w_i_h1) + bias1
Z2_2 = np.dot(input, w_i_h2) + bias2

# Feed forward Step 2:
A2_1 = sigmoid(Z2_1)
A2_2 = sigmoid(Z2_2)

# print(A2_1, "\n", A2_2)

A2 = np.append(A2_1, A2_2, axis=1)

# Feed forward Step 3 - input from hidden layer to output (we don't have bias for this)
Z3_1 = np.dot(A2, w_h_o1)
Z3_2 = np.dot(A2, w_h_o2)
Z3_3 = np.dot(A2, w_h_o3)

# Generating the outputs:

o1 = sigmoid(Z3_1)
o2 = sigmoid(Z3_2)
o3 = sigmoid(Z3_3)

target_values = final_df[["Iris-setosa", "Iris-versicolor", "Iris-virginica"]]

output_concat = np.concatenate([o1, o2, o3], axis = 1)
m, n = target_values.shape
error = np.sum(((target_values.values - output_concat) ** 2))/(2 * m)
print(error)

0.47411305331718323


#### Seed value of 120:

In [49]:
# Changing the seed value and seeing how the error varies accordingly.

# Initialize the weights and bisases first:
np.random.seed(120)
w_i_h1 = np.random.rand(4, 1)
w_i_h2 = np.random.rand(4, 1)
w_h_o1 = np.random.rand(2, 1)
w_h_o2 = np.random.rand(2, 1)
w_h_o3 = np.random.rand(2, 1)
bias1 = np.random.rand(1)
bias2 = np.random.rand(1)

# Feed forward Step 1 - input to hidden layer
Z2_1 = np.dot(input, w_i_h1) + bias1
Z2_2 = np.dot(input, w_i_h2) + bias2

# Feed forward Step 2:
A2_1 = sigmoid(Z2_1)
A2_2 = sigmoid(Z2_2)

# print(A2_1, "\n", A2_2)

A2 = np.append(A2_1, A2_2, axis=1)

# Feed forward Step 3 - input from hidden layer to output (we don't have bias for this)
Z3_1 = np.dot(A2, w_h_o1)
Z3_2 = np.dot(A2, w_h_o2)
Z3_3 = np.dot(A2, w_h_o3)

# Generating the outputs:

o1 = sigmoid(Z3_1)
o2 = sigmoid(Z3_2)
o3 = sigmoid(Z3_3)

target_values = final_df[["Iris-setosa", "Iris-versicolor", "Iris-virginica"]]

output_concat = np.concatenate([o1, o2, o3], axis = 1)
m, n = target_values.shape
error = np.sum(((target_values.values - output_concat) ** 2))/(2 * m)
print(error)

0.4529649500870492


### Step 6: Add one more hidden neuron in the middle layer and compare the error

In [59]:
# Changing the seed value and seeing how the error varies accordingly.

# Initialize the weights and bisases first:
np.random.seed(42)
w_i_h1 = np.random.rand(4, 1)
w_i_h2 = np.random.rand(4, 1)
w_i_h3 = np.random.rand(4, 1) # Adding one more hidden neuron in the middle layer.
w_h_o1 = np.random.rand(3, 1)
w_h_o2 = np.random.rand(3, 1)
w_h_o3 = np.random.rand(3, 1)
bias1 = np.random.rand(1)
bias2 = np.random.rand(1)

# Feed forward Step 1 - input to hidden layer
Z2_1 = np.dot(input, w_i_h1) + bias1
Z2_2 = np.dot(input, w_i_h2) + bias2
Z2_3 = np.dot(input, w_i_h3) # New calculation for additional hidden layer neuron.

# Feed forward Step 2:
A2_1 = sigmoid(Z2_1)
A2_2 = sigmoid(Z2_2)
A2_3 = sigmoid(Z2_3) # New sigmoid calculation for the new neuron.

# print(A2_1, "\n", A2_2)

A2 = np.concatenate([A2_1, A2_2, A2_3], axis=1)

# Feed forward Step 3 - input from hidden layer to output (we don't have bias for this)
Z3_1 = np.dot(A2, w_h_o1)
Z3_2 = np.dot(A2, w_h_o2)
Z3_3 = np.dot(A2, w_h_o3)

# Generating the outputs:

o1 = sigmoid(Z3_1)
o2 = sigmoid(Z3_2)
o3 = sigmoid(Z3_3)


target_values = final_df[["Iris-setosa", "Iris-versicolor", "Iris-virginica"]]

output_concat = np.concatenate([o1, o2, o3], axis = 1)
m, n = target_values.shape
error = np.sum(((target_values.values - output_concat) ** 2))/(2 * m)
print(error)

0.48547236651460784


# Experiment Conclusion

In this experiment, we aimed to enhance the performance of a three-layer feedforward neural network by introducing an additional hidden neuron to the middle layer. The primary objective was to assess the impact of this modification on the network's error and predictive capabilities.

The experimental process involved several crucial steps:

1. **Data Preparation:** The IRIS dataset was loaded from Kaggle and preprocessed. Categorical values were transformed into numeric values using one-hot encoding. The original species column was removed, and the one-hot encoded columns were appended to the dataset. The feature columns were scaled using the standard scaler.

2. **Neural Network Setup:** A three-layer feedforward neural network was constructed using sigmoid activation functions. The initial weights and biases were initialized for the neurons.

3. **Feedforward Computation:** The feedforward process involved computing the output of the hidden layer and the final output layer. The activation values of the hidden layer neurons were calculated using the sigmoid function.

4. **Error Calculation:** The total squared error of the neural network's predictions was calculated as a measure of its performance.

5. **Additional Neuron Introduction:** To test the effects of introducing an extra hidden neuron, a new set of weights for the neuron was generated. The new neuron was included in the hidden layer, and the network's performance was evaluated with this configuration.

6. **Comparison and Analysis:** The experiment's results were compared by evaluating the errors before and after the introduction of the additional hidden neuron. This comparison provided insights into whether the addition of a neuron improved or compromised the network's predictive accuracy.

**Conclusion:**
After carefully conducting the experiment, it was observed that the introduction of an additional hidden neuron to the middle layer did not have much of an impact on the network's performance. The comparison of errors before and after this modification revealed that the new configuration led to changes in the neural network's predictive capabilities.

It's important to note that the specific impact on the error could be influenced by various factors, including the dataset's complexity, the number of training iterations, and the initial weights and biases. Therefore, it is recommended to perform further experimentation and validation to determine the robustness and generalization of the introduced modification.

In conclusion, this experiment demonstrated the significance of hidden layer neurons in shaping a neural network's performance. The results underscore the importance of systematic experimentation and analysis when fine-tuning neural network architectures to achieve optimal predictive accuracy.