In [371]:
#importing deps
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score
from random import seed
from random import randrange
from random import random
from math import exp
import numpy as np

In [372]:
# Load the Iris dataset
iris = load_iris()

# Convert the target names to numbers using LabelEncoder
label_encoder = LabelEncoder()
iris_target_encoded = label_encoder.fit_transform(iris.target)

# Create a DataFrame containing the feature columns
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Add the encoded target column to the DataFrame
iris_df['target'] = iris_target_encoded


In [373]:
# Calculate the minimum and maximum values for each column
def minmax(dataset): 
    stats = dataset.describe().loc[['min', 'max']].values.tolist()
    return stats

In [374]:
# Rescale dataset columns to the range 0-1
def normalize(data, minmax_set):
    df= data.copy()
    for i in range(len(df.columns) - 1):
        df.iloc[:, i] = (df.iloc[:, i] - minmax_set[0][i]) / (minmax_set[1][i] - minmax_set[0][i])
    return df

In [375]:
# Split a dataset into k folds
def cross_validation_split(df, n_folds):
    df_split = list()
    df_copy = df.copy()
    fold_size = int(len(df) / n_folds)
    for i in range(n_folds):
        fold = df_copy.sample(n=fold_size, replace=False)
        df_copy = df_copy.drop(fold.index)
        df_split.append(fold)
    return df_split

In [376]:
# Calculate accuracy percentage
def accuracy_met(actual, predicted):
    # Convert input lists to Pandas Series for easy comparison
    actual_series = pd.Series(actual)
    predicted_series = pd.Series(predicted)

    # Count the number of correct predictions
    correct = (actual_series == predicted_series).sum()

    # Calculate and return the accuracy as a percentage
    accuracy = (correct / len(actual)) * 100.0
    return accuracy


In [458]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, cohen_kappa_score

def run_algorithm(dataset, algorithm, n_folds, *args):
    # Define a function to calculate accuracy percentage

    # Split the dataset into cross-validation folds
    folds = cross_validation_split(dataset, n_folds)

    # Initialize a list to store accuracy scores for each fold
    scores = list()

    for i in range(n_folds):
        # Extract the test set for this fold
        test_set = folds[i]

        # Combine all other folds to form the training set
        train_folds = [f for j, f in enumerate(folds) if j != i]
        train_set = pd.concat(train_folds)

        # Extract the actual target values for the test set
        actual = test_set['target'].tolist()

        # Make a copy of the test set and remove the target column for predictions
        test_set_copy = test_set.copy()
        test_set_copy['target'] = None
        
        print(f"For Fold {i+1}:")

        # Make predictions using the algorithm (function) with given arguments
        predicted = algorithm(train_set, test_set_copy, *args)
        
        accuracy = accuracy_met(actual, predicted)
        cm = confusion_matrix(actual, predicted)
        
        print("Confusion Matrix:")
        print(cm)

        FP = cm.sum(axis=0) - np.diag(cm)
        FN = cm.sum(axis=1) - np.diag(cm)
        TP = np.diag(cm)
        TN = cm.sum() - (FP + FN + TP)

        TPR = TP / (TP + FN)
        TNR = TN / (TN + FP)
        Precision = TP / (TP + FP)
        Recall = TP / (TP + FN)
        Acc = (TP + TN) / (TP + TN + FP + FN)
        Fscore = 2 * (Precision * Recall) / (Precision + Recall)
        k = cohen_kappa_score(actual, predicted)

        print(f"False Positives: {FP}")
        print(f"False Negatives: {FN}")
        print(f"True Positives: {TP}")
        print(f"True Negatives: {TN}")
        print(f"Sensitivity (True Positive Rate): {TPR}")
        print(f"Specificity (True Negative Rate): {TNR}")
        print(f"Precision: {Precision}")
        print(f"Recall: {Recall}")
        print(f"Accuracy: {Acc}")
        print(f"F1 Score: {Fscore}")
        print(f"Cohen's Kappa: {k}")

        scores.append(accuracy)
        print('\n\n')

    return scores



In [459]:
# Calculate neuron activation for an input
def activate(weights, inputs):
    # Add bias weight to the inputs
    inputs_with_bias = inputs + [weights[-1]]

    # Convert the weights and inputs to Pandas Series for element-wise multiplication
    weights_series = pd.Series(weights[:-1])  # Exclude the bias weight
    inputs_series = pd.Series(inputs_with_bias)

    # Calculate the activation using element-wise multiplication and sum
    activation = (weights_series * inputs_series).sum()

    return activation


In [460]:
# Transfer neuron activation using sigmoid function
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

In [461]:
# Forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row

    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

In [462]:
# Calculate the derivative of an neuron output
def transfer_derivative(output):
        return output * (1.0 - output)

In [463]:
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
        for i in reversed(range(len(network))):
                layer = network[i]
                errors = list()
                if i != len(network)-1: #if not output layer
                        for j in range(len(layer)):
                                error = 0.0
                                for neuron in network[i + 1]:
                                        error += (neuron['weights'][j] * neuron['delta'])
                                errors.append(error)
                else: #for output layer
                        for j in range(len(layer)):
                                neuron = layer[j]
                                errors.append(expected[j] - neuron['output'])
                for j in range(len(layer)):
                        neuron = layer[j]
                        neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

In [464]:
# Update network weights with error
def update_weights(network, row, l_rate):
        for i in range(len(network)):
                inputs = row[:-1]                
                if i != 0: #if not input layer
                        inputs = [neuron['output'] for neuron in network[i - 1]]
                for neuron in network[i]:
                        for j in range(len(inputs)):
                                temp = l_rate * neuron['delta'] * inputs[j] + mu * neuron['prev'][j]
                                
                                neuron['weights'][j] += temp
                                neuron['prev'][j] = temp
                        temp = l_rate * neuron['delta'] + mu * neuron['prev'][-1]
                        neuron['weights'][-1] += temp
                        neuron['prev'][-1] = temp

In [465]:
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        for _, row in train.iterrows():
            outputs = forward_propagate(network, row.iloc[:4].values.tolist())
            expected = [0 for i in range(n_outputs)]
            expected[int(row['target'])] = 1
            backward_propagate_error(network, expected)
            update_weights(network, row.values.tolist(), l_rate)   


In [466]:
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
        network = list()
        hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)], 'prev':[0 for i in range(n_inputs+1)]} for i in range(n_hidden)]        
        network.append(hidden_layer)
        output_layer = [{'weights':[random() for i in range(n_hidden + 1)],'prev':[0 for i in range(n_hidden+1)]} for i in range(n_outputs)]
        network.append(output_layer)
        return network

In [467]:
# Make a prediction with a network
def predict(network, row):
        outputs = forward_propagate(network, row)
        return outputs.index(max(outputs))
 


In [475]:
# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden):
        n_inputs = train.shape[1] - 1
        n_outputs = len(set([value for value in train.target]))
        network = initialize_network(n_inputs, n_hidden, n_outputs)
        train_network(network, train, l_rate, n_epoch, n_outputs)
        predictions = list()
        for _, row in test.iterrows():
                prediction = predict(network, row.iloc[:4].values.tolist())
                predictions.append(prediction)
        return(predictions)

In [476]:
# Test Backprop on Seeds dataset
seed(1)
# normalize input variables
minmax_set= minmax(iris_df)
dataset= normalize(iris_df, minmax_set)
# evaluate algorithm
n_folds = 5
l_rate = 0.1
mu=0.001
n_epoch = 150
n_hidden = 5
scores = run_algorithm(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden)

For Fold 1:
Confusion Matrix:
[[10  0  0]
 [ 0  7  1]
 [ 0  2 10]]
False Positives: [0 2 1]
False Negatives: [0 1 2]
True Positives: [10  7 10]
True Negatives: [20 20 17]
Sensitivity (True Positive Rate): [1.         0.875      0.83333333]
Specificity (True Negative Rate): [1.         0.90909091 0.94444444]
Precision: [1.         0.77777778 0.90909091]
Recall: [1.         0.875      0.83333333]
Accuracy: [1.  0.9 0.9]
F1 Score: [1.         0.82352941 0.86956522]
Cohen's Kappa: 0.848993288590604



For Fold 2:
Confusion Matrix:
[[10  0  0]
 [ 0  6  2]
 [ 0  1 11]]
False Positives: [0 1 2]
False Negatives: [0 2 1]
True Positives: [10  6 11]
True Negatives: [20 21 16]
Sensitivity (True Positive Rate): [1.         0.75       0.91666667]
Specificity (True Negative Rate): [1.         0.95454545 0.88888889]
Precision: [1.         0.85714286 0.84615385]
Recall: [1.         0.75       0.91666667]
Accuracy: [1.  0.9 0.9]
F1 Score: [1.   0.8  0.88]
Cohen's Kappa: 0.8469387755102041



For Fold 3:

In [477]:
print(f"scores: {scores}")
print(f"average score: {sum(scores)/len(scores)}")

scores: [90.0, 90.0, 93.33333333333333, 96.66666666666667, 100.0]
average score: 94.0
