In [39]:
!pip install numpy
!pip install pandas
!pip install openpyxl
!pip install scipy
!pip install scikit-learn
!pip install matplotlib

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [5]:
import numpy as np
import pandas as pd
from scipy.special import expit
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [6]:
# Load and preprocess data from an Excel file
def loadDataFromExcel(filePath, inputColNames, targetColName):
    print(f"Loading data from file: {filePath}")
    
    # Read data from the Excel file into a Pandas DataFrame
    df = pd.read_excel(filePath)
    
    # Drop unnecessary columns and rows with NaN values
    df = df[inputColNames + [targetColName]].dropna()
    
    # Handle non-numeric values by converting them to NaN
    df = df.apply(pd.to_numeric, errors='coerce').dropna()
    
    # Extract input features and target values from the DataFrame
    inputs = df[inputColNames].values
    targets = df[[targetColName]].values
    
    # Check if there is valid data after processing
    if inputs.shape[0] == 0 or targets.shape[0] == 0:
        print("No valid data found.")
        return np.array([]), np.array([])
    
    print(f"Loaded {len(inputs)} rows of data.")
    return inputs, targets

# Replaces negatives with 0, keeping non-negatives unchanged
def relu(x):
    return np.maximum(0, x)

# Return 1 for positive values, 0 otherwise
def reluDerivative(x):
    return np.where(np.greater(x, 0), 1, 0)

In [7]:
# Define the Multi-Layer Perceptron (MLP) class
class MLP:
    
    def __init__(self, inputSize, hiddenSize, outputSize):
        # Initialize weights with Xavier/Glorot initialization
        self.weightsInputHidden = np.random.randn(inputSize, hiddenSize) * np.sqrt(2 / (inputSize + hiddenSize))
        # Initialize Bias for Hidden Layer
        self.biasHidden = np.zeros((1, hiddenSize))
        # Initialize Weights with Xavier/Glorot Initialization for Hidden to Output Layer
        self.weightsHiddenOutput = np.random.randn(hiddenSize, outputSize) * np.sqrt(2 / (hiddenSize + outputSize))
        # Initialize Bias for Output Layer
        self.biasOutput = np.zeros((1, outputSize))

        # Initialize Scalers for Input and Target Data
        self.scalerInput = StandardScaler()
        self.scalerTarget = StandardScaler()

    def forward(self, inputs):
        # Hidden Layer Input Calculation
        self.hiddenLayerInput = np.dot(inputs, self.weightsInputHidden) + self.biasHidden
        # Hidden Layer Output Calculation using ReLU activation
        self.hiddenLayerOutput = relu(self.hiddenLayerInput)
        # Output Layer Input Calculation
        self.outputLayerInput = np.dot(self.hiddenLayerOutput, self.weightsHiddenOutput) + self.biasOutput
        # Output Layer Output Calculation using ReLU activation
        self.predictedOutput = self.outputLayerInput
        
        return self.predictedOutput

    def backward(self, inputs, targets, learningRate):
        # Error Calculation
        error = targets - self.predictedOutput
        # Output Layer Delta Calculation
        outputDelta = error
        # Hidden Layer Error Calculation
        hiddenLayerError = outputDelta.dot(self.weightsHiddenOutput.T)
        # Hidden Layer Delta Calculation
        hiddenLayerDelta = hiddenLayerError * reluDerivative(self.hiddenLayerOutput)
        # Update weights and biases
        self.weightsHiddenOutput += self.hiddenLayerOutput.T.dot(outputDelta) * learningRate
        self.biasOutput += np.sum(outputDelta, axis=0, keepdims=True) * learningRate
        self.weightsInputHidden += inputs.T.dot(hiddenLayerDelta) * learningRate
        self.biasHidden += np.sum(hiddenLayerDelta, axis=0, keepdims=True) * learningRate

    def train(self, inputs, targets, epochs, learningRate):
        # Fit the scaler before training
        self.fitScalers(inputs, targets)
        inputsNormalised = self.normaliseInput(inputs)

        # Begin looping through training epochs
        for epoch in range(epochs):
            totalLoss = 0
            for inputData, target in zip(inputsNormalised, targets):
                inputData = np.array([inputData], dtype=float)
                target = np.array([target], dtype=float)

                # Forward and backward pass
                self.forward(inputData)
                self.backward(inputData, target, learningRate)

                # Calculate and accumulate the loss
                loss = np.mean(0.5 * (target - self.predictedOutput)**2)
                totalLoss += loss

            # Print loss for every 100 epochs
            if (epoch + 1) % 100 == 0:
                if len(inputs) > 0:
                    averageLoss = totalLoss / len(inputs)
                    print(f"Epoch: {epoch + 1}, Loss: {averageLoss}")

    def normaliseInput(self, inputs):
        return self.scalerInput.transform(inputs)

    def normaliseTarget(self, targets):
        return self.scalerTarget.transform(targets)

    def denormaliseTarget(self, normalisedTargets):
        return self.scalerTarget.inverse_transform(normalisedTargets)

    def fitScalers(self, inputs, targets):
        self.scalerInput.fit(inputs)
        self.scalerTarget.fit(targets)

    # Make prediction
    def predict(self, inputs):
        return np.round(self.forward(inputs), 2)

In [8]:
# Main code
if __name__ == "__main__":
    numberOfEpochs = 2500
    learning_rate = 0.00001
    numberOfHiddenNodes = 50
    # Specify the file path and column names
    filePath = 'FEHDataStudent.xlsx'
    inputColumnNames = ['AREA', 'BFIHOST', 'FARL', 'FPEXT', 'LDP', 'PROPWET', 'RMED-1D', 'SAAR']
    targetColumnName = 'Index flood'

    # Load data from Excel
    inputs, targets = loadDataFromExcel(filePath, inputColumnNames, targetColumnName)

    mlp = MLP(inputSize=len(inputColumnNames), hiddenSize=numberOfHiddenNodes, outputSize=1)

    # Fit both input and target scalers
    mlp.fitScalers(inputs, targets)

    # Normalize input and target
    inputsNormalised = mlp.normaliseInput(inputs)
    targetsNormalised = mlp.normaliseTarget(targets)

    mlp.train(inputsNormalised, targetsNormalised, epochs=numberOfEpochs, learningRate=learning_rate)  # Adjusted learning rate

    predictions = []
    mse = 0.0

    # Test results
    for inputData, target in zip(inputs, targets):
        inputDataNormalised = mlp.normaliseInput(np.array([inputData]))
        predictionNormalised = mlp.predict(inputDataNormalised)
        prediction = mlp.denormaliseTarget(predictionNormalised)
        predictions.append(prediction)

        # Calculate Mean Squared Error (MSE)
        mse += (prediction - target)**2

    mse /= len(targets)  # Calculate mean MSE

    accuracy = 1 - mse/np.var(targets)  # Calculate accuracy using R-squared

    print(f"Mean Squared Error: {mse}")
    print(f"Variance of Targets: {np.var(targets)}")
    print(f"Accuracy (R-squared): {accuracy}")

    # Plot the results
    plt.figure(figsize=(10, 6))
    plt.scatter(targets, predictions, label='Predictions')
    plt.plot([min(targets), max(targets)], [min(targets), max(targets)], '--', color='red', label='Perfect Prediction')
    plt.xlabel('Target Values')
    plt.ylabel('Predicted Values')
    plt.title('Model Predictions vs Actual Targets - (Multi-Layer Perceptron | Epoch:'+str(numberOfEpochs)+', LR:'+str(learning_rate)+', NumHidden:'+str(numberOfHiddenNodes)+')')
    plt.legend()
    plt.show()

Loading data from file: FEHDataStudent.xlsx
Loaded 592 rows of data.
Epoch: 100, Loss: 0.24096100922136926
Epoch: 200, Loss: 0.18336056223356953
Epoch: 300, Loss: 0.16439915070016894


KeyboardInterrupt: 