## Artificial Neural Network Implementation

In [308]:
import pandas as pd 
import numpy as np 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [309]:
iris = load_iris(as_frame=True)
data = pd.concat([iris.data,pd.DataFrame(iris.target)],axis=1)
data.head()
data = data[data.target!=2]
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
95,5.7,3.0,4.2,1.2,1
96,5.7,2.9,4.2,1.3,1
97,6.2,2.9,4.3,1.3,1
98,5.1,2.5,3.0,1.1,1


### Initalize Class 

In [310]:
class ANN:
    def __init__(self,data,target="target",bias=1,n_layers=3,hidden_units=2,output_units=2):
        self.bias = bias 
        self.target_name = target
        self.n_layers = n_layers #3 layers 
        self.hidden_units = hidden_units #2 hidden units
        self.output_units = output_units #2 output units

        self.data = data
        self.target = self.data[self.target_name].to_numpy()
        self.data = self.data.drop(columns=self.target_name)
        self.n_features = len(self.data.columns)
        self.features = self.data.columns.to_list()
    
        self.data = self.data.to_numpy()
        self.n_samples = self.data.shape[0]
        #self.Input_W = self.weight_initialization()
        #self.Hidden_W = self.weight_initialization(input_layer=False)
        self.Input_W = self.golorot_initialization(input_layer=True)
        self.Hidden_W = self.golorot_initialization(input_layer=False)
        self.Input_B = np.zeros(self.hidden_units)  # One bias for each hidden unit
        self.Hidden_B = np.zeros(self.output_units)  # One bias for each output unit

    def weight_initialization(self,input_layer=True): #initalize weights and add bias 
        if input_layer:
            theta = np.random.rand(self.n_features,self.hidden_units)
            theta = np.insert(theta,0,[self.bias,self.bias],axis=0) #insert bias into both neurons 
            return theta
        else:
            phi = np.random.rand(self.hidden_units,self.output_units)
            phi = np.insert(phi,0,[self.bias,self.bias],axis=0)
            return phi
    def golorot_initialization(self,input_layer=True): #intialization of weights, using Xavier/Golorot method: ideal for sigmoid activation 
        if input_layer:
            # initializing weights for input layer to first hidden layer
            output_units = self.hidden_units
            x = np.sqrt(6/(self.n_features+output_units))
            weights = np.random.uniform(-x,x,size = (self.n_features+1,output_units))  
        else:
            # initializing weights for hidden layer to output layer
            input_units = self.hidden_units
            x = np.sqrt(6/(input_units+self.output_units))
            weights = np.random.uniform(-x,x,size = (input_units+1,self.output_units)) #add bias weight, randomly initalized
        return weights 
    def sigmoid_activation(self,z):
        activity = 1 / (1 + np.exp(-z))
        return activity
    def calculate_z(self,W,x):
        linear_combination = np.dot(W,x)
        return linear_combination 
    def forward_propagation(self,sample_n=0): #FP computed for each sample
        datawbias = np.insert(self.data[sample_n,:],0,self.bias)
        a1 = self.sigmoid_activation(self.calculate_z(self.Input_W.T,datawbias))
        a1 = np.insert(a1,0,1,axis=0) #add bias into activity vector 
        self.a1 = a1 
        a2 = self.sigmoid_activation(self.calculate_z(self.Hidden_W.T,a1))
        self.output_activity = a2 
        return a2
    def compute_cost(self,sample=0,classification_b=True): #J computed for each sample
        if classification_b:
            a2 = self.forward_propagation(sample_n=sample)
            y_i = self.target[sample]
            cross_entropy = - (y_i * np.log(a2) + (1 - y_i) * np.log(1 - a2))
            return cross_entropy
    def back_propagation(self,sample=0,alpha=0.001): #computed after FP of each sample, process of simulatenous update of parameters
        self.Input_B = self.Input_W[0] #put bias parameter in own vector 
        self.Hidden_B = self.Hidden_W[0]
        self.Hidden_W = self.Hidden_W[1:,:] #remove bias from weight vector
        self.Input_W = self.Input_W[1:,:] #remove bias from weight vector
        target = self.target[sample]
        delta3 = self.output_activity - target #a2 - y shape (2,)

        delta2_W = delta3.dot(self.Hidden_W.T) * (self.a1[1:]*(1-self.a1[1:]))  #dot product of delta3 and weights in hidden layer (2X1).T* (2x2)
        delta2_B = delta3.dot(self.Hidden_B) * (self.a1[1] * (1 - self.a1[1])) 

        #update bias
        self.Input_B -= alpha * delta2_B
        #update weights 
        self.Hidden_W -= alpha * np.outer(self.a1[1:], delta3)  # Use a1[1:] to exclude bias
        self.Input_W -= alpha * np.outer(self.data[sample], delta2_W)  # Update input weights with outer product

        #add bias back into weights vector for subsequent iterations
        self.Input_W = np.vstack((self.Input_B.reshape(1, -1), self.Input_W))
        self.Hidden_W = np.vstack((self.Hidden_B.reshape(1,-1),self.Hidden_W))
    def stopping_criteria(self, n=100, threshold=0.00001, max_iterations=100000):
        stop = False
        n_iterations = 0
        iter_cost = {}
        while not stop:
            computed_cost = 0
            n_iterations += 1
            for i in range(self.n_samples):
                self.forward_propagation(sample_n=i)
                computed_cost += self.compute_cost(sample=i)
                self.back_propagation(sample=i)
            
            # Record the average cost per iteration
            iter_cost[n_iterations] = computed_cost / self.n_samples
            
            # Check stopping criteria after the first iteration
            if n_iterations > 1:
                deltaJ = abs(iter_cost[n_iterations - 1] - iter_cost[n_iterations])
                # Ensure both outputs meet the threshold
                if deltaJ[0] < threshold and deltaJ[1] < threshold:
                    print(f'Stopping criteria met at iteration {n_iterations}')
                    stop = True
            if n_iterations >= max_iterations:
                print("maximum number of iterations reached")
                break
                #raise ValueError("Maximum Number of Iterations Reached")
        return iter_cost
    def iterations(self,n_iterations):
        iterations = 0 
        iter_cost = {}
        for i in range(n_iterations):
            computed_cost = 0 
            iterations += 1
            for j in range(self.n_samples):
                self.forward_propagation(sample_n=j)
                computed_cost += self.compute_cost(sample=j) #accumulate cost for each sample
                self.back_propagation(sample=j)
            iter_cost[iterations] = (computed_cost/self.n_samples) #average cost per iteration
        return iter_cost

                



       
    



### Psuedo-Testing

In [284]:
x = ANN(data=data)
#x.golorot_initialization(input_layer=False).shape
#x.iterations(n_iterations=1000)
x.stopping_criteria()

Stopping criteria met at iteration 179


{1: array([1.01394462, 0.70322481]),
 2: array([0.99092772, 0.703002  ]),
 3: array([0.97180484, 0.70270847]),
 4: array([0.95539015, 0.70237165]),
 5: array([0.94093404, 0.70201043]),
 6: array([0.92795112, 0.70163748]),
 7: array([0.91611765, 0.70126131]),
 8: array([0.90521101, 0.70088764]),
 9: array([0.89507354, 0.70052029]),
 10: array([0.88559042, 0.70016182]),
 11: array([0.87667582, 0.69981388]),
 12: array([0.86826399, 0.69947754]),
 13: array([0.86030337, 0.69915343]),
 14: array([0.85275265, 0.69884186]),
 15: array([0.84557806, 0.69854293]),
 16: array([0.83875144, 0.69825657]),
 17: array([0.83224892, 0.69798263]),
 18: array([0.8260499 , 0.69772084]),
 19: array([0.82013639, 0.6974709 ]),
 20: array([0.8144924 , 0.69723245]),
 21: array([0.8091036 , 0.69700513]),
 22: array([0.803957  , 0.69678854]),
 23: array([0.7990407 , 0.69658228]),
 24: array([0.79434371, 0.69638595]),
 25: array([0.78985584, 0.69619916]),
 26: array([0.78556755, 0.6960215 ]),
 27: array([0.7814698

In [192]:
x.Hidden_W.shape

(3, 2)

### Normalize and Divide Data into Testing (0.7) and Training (0.3) sets 

In [288]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
iris = load_iris(as_frame=True)
#iris.data = pd.DataFrame(scaler.fit_transform(pd.DataFrame(iris.data)))

X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,train_size=0.7,test_size=0.3,stratify=iris.target)

iris_train = pd.concat([X_train,pd.DataFrame(y_train)],axis=1)
iris_train = iris_train[(iris_train["target"] == 0)| (iris_train["target"] == 1)]
iris_test= pd.concat([X_test,pd.DataFrame(y_test)],axis=1)
iris_test = iris_test[(iris_test["target"] == 0) | (iris_test["target"] == 1)]

iris_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
62,6.0,2.2,4.0,1.0,1
55,5.7,2.8,4.5,1.3,1
59,5.2,2.7,3.9,1.4,1
35,5.0,3.2,1.2,0.2,0
30,4.8,3.1,1.6,0.2,0
...,...,...,...,...,...
79,5.7,2.6,3.5,1.0,1
12,4.8,3.0,1.4,0.1,0
33,5.5,4.2,1.4,0.2,0
82,5.8,2.7,3.9,1.2,1


### Train Model Instance

In [289]:
model = ANN(data=iris_train)
model.stopping_criteria()
print("Trained Parameters")
print(40*"-")
print(f'Input_W:{model.Input_W}')
print(f'Input_B:{model.Input_B}')
print(f'Hidden_W:{model.Hidden_W}')
print(f'Hidden_B:{model.Hidden_B}')


Stopping criteria met at iteration 2853
Trained Parameters
----------------------------------------
Input_W:[[ 0.938454    0.21917631]
 [-0.7544694   0.95213342]
 [-2.31787293  0.66394943]
 [ 3.03662226  0.18401585]
 [ 2.12963537  0.22274404]]
Input_B:[0.938454   0.21917631]
Hidden_W:[[-0.28986353  1.10716997]
 [ 7.31771864  7.33764888]
 [-3.16009313 -4.56772765]]
Hidden_B:[-0.28986353  1.10716997]


### Apply Learned Parameters to Testing Data 

In [290]:
def sigmoid_activation(z):
    activity = 1 / (1 + np.exp(-z))
    return activity

def apply_params(row, Input_W, Hidden_W):
    sample_data = row.to_numpy()
    sample_data = np.insert(sample_data, 0, 1) 
    
    hidden_input = sample_data.dot(Input_W)  # Shape (2,)
    hidden_activation = sigmoid_activation(hidden_input)
    
    # Add bias term to hidden layer
    hidden_activation = np.insert(hidden_activation, 0, 1)  # Shape becomes (3,)
    
    # Forward pass through hidden layer to output layer
    output_input = hidden_activation.dot(Hidden_W)  # Shape (2,)
    predicted = sigmoid_activation(output_input)  # Apply sigmoid for final output
    
    return predicted




In [291]:
target =iris_test['target']
iris_test.drop(columns='target',inplace=True)
iris_test



Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
81,5.5,2.4,3.7,1.0
57,4.9,2.4,3.3,1.0
29,4.7,3.2,1.6,0.2
69,5.6,2.5,3.9,1.1
80,5.5,2.4,3.8,1.1
83,6.0,2.7,5.1,1.6
45,4.8,3.0,1.4,0.3
37,4.9,3.6,1.4,0.1
96,5.7,2.9,4.2,1.3
15,5.7,4.4,1.5,0.4


In [292]:
predictions = []
for index,row in iris_test.iterrows():
    prediction = apply_params(row,model.Input_W,model.Hidden_W)
    predictions.append(prediction)


iris_test["prediction"] = predictions
iris_test["target"]  = target 


### Once you have predictions, apply logistic function (hypothesis function), could also apply softmax function

In [293]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

iris_test['prediction_sig'] = iris_test['prediction'].apply(lambda x: sigmoid(x))
iris_test["prediction from sigmoid"] = None
for index,row in iris_test.iterrows():
    sample = iris_test.loc[index,"prediction_sig"]
    if sample[0] > sample [1]:
        iris_test.loc[index,"prediction from sigmoid"] = 0
    else:
        iris_test.loc[index,"prediction from sigmoid"] = 1



In [294]:
iris_test

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),prediction,target,prediction_sig,prediction from sigmoid
81,5.5,2.4,3.7,1.0,"[0.978020164449452, 0.9782260973984264]",1,"[0.7267151971049352, 0.7267560935242615]",1
57,4.9,2.4,3.3,1.0,"[0.9762064394099322, 0.9764341130846603]",1,"[0.7263548428356112, 0.7264000937164701]",1
29,4.7,3.2,1.6,0.2,"[0.03279549955031741, 0.03250074252593055]",0,"[0.508198140111843, 0.5081244704887327]",0
69,5.6,2.5,3.9,1.1,"[0.9786368310265532, 0.9788367357568061]",1,"[0.7268376500986949, 0.726877338319662]",1
80,5.5,2.4,3.8,1.1,"[0.9786437741480405, 0.9788452442614477]",1,"[0.7268390286163612, 0.726879027481523]",1
83,6.0,2.7,5.1,1.6,"[0.9795133691018668, 0.9797030834837753]",1,"[0.7270116474634459, 0.7270492976417049]",1
45,4.8,3.0,1.4,0.3,"[0.03278728435707139, 0.03249440557150255]",0,"[0.5081960868655323, 0.5081228866683276]",0
37,4.9,3.6,1.4,0.1,"[0.03110652533973108, 0.03081137372721199]",0,"[0.5077760043295799, 0.5077022341043849]",0
96,5.7,2.9,4.2,1.3,"[0.9788897124978616, 0.9790850000984754]",1,"[0.7268878554894184, 0.7269266226373072]",1
15,5.7,4.4,1.5,0.4,"[0.030845429569913863, 0.030535762011068068]",0,"[0.507710746042484, 0.507633347380053]",0


### 100% model accuracy achieved, very sensisitve of too small of a learning rate and initialization of parameters. Would suggest using a single output neuron and softmax function for binary classification tasks

In [306]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_true=iris_test["target"].astype(int),y_pred=iris_test["prediction from sigmoid"].astype(int))
print(accuracy)

1.0


In [307]:
iris_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
62,6.0,2.2,4.0,1.0,1
55,5.7,2.8,4.5,1.3,1
59,5.2,2.7,3.9,1.4,1
35,5.0,3.2,1.2,0.2,0
30,4.8,3.1,1.6,0.2,0
...,...,...,...,...,...
79,5.7,2.6,3.5,1.0,1
12,4.8,3.0,1.4,0.1,0
33,5.5,4.2,1.4,0.2,0
82,5.8,2.7,3.9,1.2,1


In [305]:
iris_test

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),prediction,target,prediction_sig,prediction from sigmoid
81,5.5,2.4,3.7,1.0,"[0.978020164449452, 0.9782260973984264]",1,"[0.7267151971049352, 0.7267560935242615]",1
57,4.9,2.4,3.3,1.0,"[0.9762064394099322, 0.9764341130846603]",1,"[0.7263548428356112, 0.7264000937164701]",1
29,4.7,3.2,1.6,0.2,"[0.03279549955031741, 0.03250074252593055]",0,"[0.508198140111843, 0.5081244704887327]",0
69,5.6,2.5,3.9,1.1,"[0.9786368310265532, 0.9788367357568061]",1,"[0.7268376500986949, 0.726877338319662]",1
80,5.5,2.4,3.8,1.1,"[0.9786437741480405, 0.9788452442614477]",1,"[0.7268390286163612, 0.726879027481523]",1
83,6.0,2.7,5.1,1.6,"[0.9795133691018668, 0.9797030834837753]",1,"[0.7270116474634459, 0.7270492976417049]",1
45,4.8,3.0,1.4,0.3,"[0.03278728435707139, 0.03249440557150255]",0,"[0.5081960868655323, 0.5081228866683276]",0
37,4.9,3.6,1.4,0.1,"[0.03110652533973108, 0.03081137372721199]",0,"[0.5077760043295799, 0.5077022341043849]",0
96,5.7,2.9,4.2,1.3,"[0.9788897124978616, 0.9790850000984754]",1,"[0.7268878554894184, 0.7269266226373072]",1
15,5.7,4.4,1.5,0.4,"[0.030845429569913863, 0.030535762011068068]",0,"[0.507710746042484, 0.507633347380053]",0
