# Back Propagation - Boston Housing Dataset 
---

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
# Load the dataset
HousingData = 'HousingData.csv'
dataset = pd.read_csv(HousingData)

In [4]:
# Separate the features (X) and target variable (y)
X = dataset.drop('CRIM', axis=1)
y = dataset['CRIM'] 

In [5]:
# Handle missing values
X.fillna(X.mean(), inplace=True)  # Replace missing values with the column mean

# Convert the dataset to a numpy array
X = X.to_numpy()
y = y.to_numpy()

In [6]:
# Normalize input variables
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [7]:
# Define the functions and algorithms
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights': np.random.uniform(size=n_inputs + 1)} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights': np.random.uniform(size=n_hidden + 1)} for i in range(n_outputs)]
    network.append(output_layer)
    return network

In [8]:
# Make a prediction with a network
def predict(network, row):
    outputs = forward_propagate(network, row)
    return np.argmax(outputs)

In [9]:
# Forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = neuron['weights'][-1]
            for i in range(len(neuron['weights']) - 1):
                activation += neuron['weights'][i] * inputs[i]
            neuron['output'] = sigmoid(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

In [10]:
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = []
        if i != len(network) - 1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * sigmoid_derivative(neuron['output'])

In [11]:
# Update network weights with error
def update_weights(network, row, learning_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i - 1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += learning_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += learning_rate * neuron['delta']

In [12]:
# Sigmoid function
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x * (1.0 - x)

# Calculate accuracy
def calculate_accuracy(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [13]:
# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden):
    n_inputs = train.shape[1] - 1
    n_outputs = len(set(train[:, -1])) if len(set(train[:, -1])) > len(set(test[:, -1])) else len(set(test[:, -1]))
    network = initialize_network(n_inputs, n_hidden, n_outputs)
    for epoch in range(n_epoch):
        for row in train:
            outputs = forward_propagate(network, row)
            expected = np.zeros(n_outputs)
            if not np.isnan(row[-1]) and int(row[-1]) < n_outputs:
                expected[int(row[-1])] = 1
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
    predictions = []
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return predictions

In [14]:
# Convert the target variable to integers
class_values = list(set(y_train))
lookup = {value: i for i, value in enumerate(class_values)}
y_train = np.array([lookup[value] if not pd.isnull(value) else np.nan for value in y_train])

In [15]:
# Combine the features and target variable for training
train_data = np.column_stack((X_train, y_train))

# Convert the target variable to integers for testing
y_test = np.array([lookup.get(value, np.nan) if not pd.isnull(value) else np.nan for value in y_test])

# Combine the features and target variable for testing
test_data = np.column_stack((X_test, y_test))

In [16]:
# Evaluate algorithm
def evaluate_algorithm(data, algorithm, n_folds, *args):
    folds = cross_validation_split(data, n_folds)
    scores = []
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = np.concatenate(train_set)
        test_set = fold
        predicted = algorithm(train_set, test_set, *args)
        actual = test_set[:, -1]
        accuracy = calculate_accuracy(actual, predicted)
        scores.append(accuracy)
    return scores

def cross_validation_split(data, n_folds):
    fold_size = len(data) // n_folds
    data_copy = list(data)
    folds = []
    for _ in range(n_folds):
        fold = []
        while len(fold) < fold_size:
            index = np.random.choice(len(data_copy))
            fold.append(data_copy.pop(index))
        folds.append(np.array(fold))
    return folds

def calculate_accuracy(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 5
scores = evaluate_algorithm(train_data, back_propagation, n_folds, l_rate, n_epoch, n_hidden)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))