22/11/2019

The code here is close to Nielsen. Each activation is treated as a column vector, even the last one which for XOR is just a simple number and is encloded in a shape (1,1) column vector of just one row, i.e if activation value of output neuron is a, then it is computed as np.array([[a]]).

Can easily adapt code here for the MLP excercises and the Iris classification problem.
But you may need to use more than 2 hidden neurons and more than 1 output neuron.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def sigm(z):
    return  1.0/(1.0 + np.exp(-z))

def sigm_deriv(z):
    a = sigm(z)
    return a*(1 - a)

# Cross-Entropy Cost function
def cross_entropy(y_true, y_pred):
    epsilon = 1e-15  # To avoid log(0) error
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
    return -np.mean(np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred), axis=1))

In [3]:
class XOR_MLP:
    def __init__(self):
        self.train_inputs = np.array([[0,0], [0,1], [1,0], [1,1]])
        self.train_outputs = np.array([0,1,1,0])
          
        np.random.seed(23)
        # hidden layer of 2 neurons
        self.w2 = np.random.randn(2,2)
        self.b2 = np.random.randn(2,1)
        
        # output layer has 1 neuron
        self.w3 = np.random.randn(1,2)
        self.b3 = np.random.randn(1,1)
        

    def feedforward(self, xs):    
        # here xs is a matrix where each column is an input vector
        # w2.dot(xs) applies the weight matrix w2 to each input at once
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)            
        return a3s

    
    def backprop(self, xs, ys):   # Assumed here that input vectors are rows in xs
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x,y in zip(xs,ys):               # for zip to work, each x in xs must be a row vector
            a1 = x.reshape(2,1)              # convert input row vector x into (2,1) column vector
            z2 = self.w2.dot(a1) + self.b2   # so will z2 and a2
            a2 = sigm(z2)                    # column vector shape (2,1)
            
            z3 = self.w3.dot(a2) + self.b3   # a simple number in a (1,1) column vector
            a3 = sigm(z3)                    # so is a3
            
            delta3 = (a3-y) * sigm_deriv(z3)                   # delta3.shape is (1,1)
           
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))  # w3 shape is (1,2), w3.T shape is (2,1)
                                                               # delta2 is shape (2,1)
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  # shape (1,1) by (1,2) gives (1,2)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  # shape (2,1) by (1,2) gives (2,2)
        
           
            cost += ((a3 - y)**2).sum() 
        
        n = len(ys)  # number of training vectors    
        
        # get the average change per training input  
        # return the average adjustments to the biases and weights 
        # in each layer and the cost
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
        
    def train(self, epochs, eta):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(xs,ys)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
        plt.plot(cost)
        return cost

In [None]:
xor = XOR_MLP()
xs = xor.train_inputs.T

print(xor.feedforward(xs))

epochs = 1000
c = xor.train(epochs, 3.0)

print(xor.feedforward(xs))

x_axis = np.linspace(1, epochs, epochs, dtype=int)
fig, axs = plt.subplots(3,1,figsize=(10,15))
plt.subplot(3,1,1)
plt.plot(x_axis, c)
plt.subplot(3,1,2)
plt.plot(x_axis[:61], c[:61])
plt.subplot(3,1,3)
plt.plot(x_axis[900:], c[900:])

Exercise 1: copy and adapt the above XOR_MLP code so that it uses 3 neurons in the hidden layer. Train such a MLP and see if it learns faster than the previous one.

In [5]:
import numpy as np
import matplotlib.pyplot as plt

class GeneralizedMLP:
    def __init__(self, input_neurons, hidden_neurons, output_neurons):
        self.input_neurons = input_neurons
        self.hidden_neurons = hidden_neurons
        self.output_neurons = output_neurons

        # XOR training data
        self.train_inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        self.train_outputs = np.array([[0], [1], [1], [0]])

        np.random.seed(23)
        # Initialize weights and biases
        self.w2 = np.random.randn(hidden_neurons, input_neurons)
        self.b2 = np.random.randn(hidden_neurons, 1)

        self.w3 = np.random.randn(output_neurons, hidden_neurons)
        self.b3 = np.random.randn(output_neurons, 1)

    def feedforward(self, xs):
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)
        return a3s

    def backprop(self, xs, ys):
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)

        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0

        for x, y in zip(xs, ys):
            a1 = x.reshape(self.input_neurons, 1)
            z2 = self.w2.dot(a1) + self.b2
            a2 = sigm(z2)

            z3 = self.w3.dot(a2) + self.b3
            a3 = sigm(z3)

            delta3 = (a3 - y)  # MSE gradient for output layer
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))

            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)

            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)

            cost += ((a3 - y)**2).sum()  # Mean Squared Error cost

        n = len(ys)
        return del_b2 / n, del_w2 / n, del_b3 / n, del_w3 / n, cost / n

    def train(self, epochs, eta):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))

        for e in range(epochs):
            d_b2, d_w2, d_b3, d_w3, cost[e] = self.backprop(xs, ys)

            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3

        # Plot cost over epochs
        plt.plot(cost)
        plt.title("Training Loss Over Epochs")
        plt.xlabel("Epochs")
        plt.ylabel("Cost (Mean Squared Error)")
        plt.show()

        return cost


In [None]:
# Test the generalized MLP with 3 hidden neurons
mlp = GeneralizedMLP(input_neurons=2, hidden_neurons=3, output_neurons=1)

# Training parameters
epochs = 2000
learning_rate = 0.5

# Train the model
cost = mlp.train(epochs, learning_rate)

# Display predictions
print("Predictions after training:")
for x in mlp.train_inputs:
    prediction = mlp.feedforward(x.reshape(2, 1)).flatten()
    print(f"Input: {x}, Predicted: {prediction}")

# Additional tests for cost visualization
x_axis = np.linspace(1, epochs, epochs, dtype=int)
fig, axs = plt.subplots(3, 1, figsize=(10, 15))

plt.subplot(3, 1, 1)
plt.plot(x_axis, cost)
plt.title("Cost over all epochs")

plt.subplot(3, 1, 2)
plt.plot(x_axis[:61], cost[:61])
plt.title("Cost during first 61 epochs")

plt.subplot(3, 1, 3)
plt.plot(x_axis[900:], cost[900:])
plt.title("Cost during final 100 epochs")

plt.tight_layout()
plt.show()

Exercise 1

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# MLP class for XOR problem
class XOR_MLP:
    def __init__(self, hidden_neurons=4):
        self.train_inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        self.train_outputs = np.array([0, 1, 1, 0])
          
        np.random.seed(23)
        self.w2 = np.random.randn(hidden_neurons, 2)
        self.b2 = np.random.randn(hidden_neurons, 1)
        
        self.w3 = np.random.randn(hidden_neurons, hidden_neurons)
        self.b3 = np.random.randn(hidden_neurons, 1)
        
        self.w4 = np.random.randn(1, hidden_neurons)
        self.b4 = np.random.randn(1, 1)

    def feedforward(self, xs):
        a2 = sigm(self.w2.dot(xs) + self.b2)
        a3 = sigm(self.w3.dot(a2) + self.b3)
        a4 = sigm(self.w4.dot(a3) + self.b4)
        return a4

    # Cross-entropy loss function
    def cross_entropy_loss(self, a4, y):
        return -np.sum(y * np.log(a4))

    # Mean squared error loss function
    def mse_loss(self, a4, y):
        return np.mean((a4 - y) ** 2)

    def backprop(self, xs, ys, loss_fn='mse'):
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        
        del_w4 = np.zeros(self.w4.shape, dtype=float)
        del_b4 = np.zeros(self.b4.shape, dtype=float)

        cost = 0.0
        
        for x, y in zip(xs, ys):
            a1 = x.reshape(2, 1)
            z2 = self.w2.dot(a1) + self.b2
            a2 = sigm(z2)
            
            z3 = self.w3.dot(a2) + self.b3
            a3 = sigm(z3)
            
            z4 = self.w4.dot(a3) + self.b4
            a4 = sigm(z4)
            
            if loss_fn == 'cross_entropy':
                cost += self.cross_entropy_loss(a4, y)
                delta4 = (a4 - y) * sigm_deriv(z4)
            elif loss_fn == 'mse':
                cost += self.mse_loss(a4, y)
                delta4 = 2 * (a4 - y) * sigm_deriv(z4)

            delta3 = sigm_deriv(z3) * (self.w4.T.dot(delta4))
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))

            del_b4 += delta4
            del_w4 += delta4.dot(a3.T)
            
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)
        
        n = len(ys)
        return del_b2 / n, del_w2 / n, del_b3 / n, del_w3 / n, del_b4 / n, del_w4 / n, cost / n

    def train(self, epochs, eta, loss_fn='mse'):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2, d_w2, d_b3, d_w3, d_b4, d_w4, cost[e] = self.backprop(xs, ys, loss_fn)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
            self.b4 -= eta * d_b4
            self.w4 -= eta * d_w4
        
        return cost

# Instantiate and train the MLP with 4 hidden neurons
xor_model = XOR_MLP(hidden_neurons=4)  # 4 neurons as required
xs = xor_model.train_inputs.T

# Training parameters
epochs = 2000  # Training for 2000 iterations
learning_rate = 0.5

# Train using Cross-Entropy loss
cost_cross_entropy = xor_model.train(epochs, learning_rate, loss_fn='cross_entropy')

# Train using Mean Squared Error loss
cost_mse = xor_model.train(epochs, learning_rate, loss_fn='mse')

# Print the output before and after training
print("Before Training (with Hidden Layers):")
print(xor_model.feedforward(xs))

print("After Training (with Cross-Entropy Loss):")
print(xor_model.feedforward(xs))

# Plotting the cost curves for comparison
plt.plot(cost_cross_entropy, label='Cross-Entropy Loss')
plt.plot(cost_mse, label='Mean Squared Error Loss')
plt.title("Cost Over Epochs for Different Loss Functions")
plt.xlabel("Epochs")
plt.ylabel("Cost")
plt.legend()
plt.show()


In [None]:
# A more general purpose MLP with m input neurons, n hidden neurons and o output neurond
# You must complete this code yourself
import numpy as np
import matplotlib.pyplot as plt

# MLP class
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.weights_input_hidden = np.random.uniform(-1, 1, (input_size, hidden_size))
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.uniform(-1, 1, (hidden_size, output_size))
        self.bias_output = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigm(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.final_output = sigm(self.final_input)
        return self.final_output

    def backward(self, X, y, output):
        # Backward pass using Cross-Entropy loss
        output_error = y - output  # Output layer error
        output_delta = output_error * sigm_deriv(output)  # Output layer delta

        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)  # Hidden layer error
        hidden_delta = hidden_error * sigm_deriv(self.hidden_output)  # Hidden layer delta

        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * self.learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate
        self.weights_input_hidden += np.dot(X.T, hidden_delta) * self.learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate

    def train(self, X, y, epochs):
        losses = []
        for epoch in range(epochs):
            # Forward pass
            output = self.forward(X)
            # Calculate loss using Cross-Entropy
            loss = cross_entropy(y, output)
            losses.append(loss)
            # Backward pass
            self.backward(X, y, output)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        return losses

# Dataset (XOR problem as an example)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Experiment with different learning rates
learning_rates = [0.1, 0.01, 0.001, 1]
epochs = 2000

for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")
    mlp = MLP(input_size=2, hidden_size=4, output_size=1, learning_rate=lr)
    losses = mlp.train(X, y, epochs)

    # Plot loss curve
    plt.plot(losses, label=f"LR={lr}")

# Show loss curves
plt.title("Loss Curves for Different Learning Rates")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Testing the trained models on a new input
test_input = np.array([[1, 0], [0, 1]])  # Example inputs for testing
for lr in learning_rates:
    mlp = MLP(input_size=2, hidden_size=4, output_size=1, learning_rate=lr)
    mlp.train(X, y, epochs)
    prediction = mlp.forward(test_input)
    print(f"Prediction for test input with LR={lr}: {prediction}")

In [None]:
# Are the outputs of these correct? They are partially working. I've made correct adjustments below.
"""
p1 = MLP(3,4,2)
print('\n W2 = \n',p1.w2, '\n W3 = \n', p1.w3, '\n')

p2 = MLP(4,6,3)
print('\n W2 = \n', p2.w2, '\nW3 = \n', p2.w3, '\n')
"""

# Corrected MLP instances
p1 = MLP(3, 4, 2, learning_rate=0.1)  # Added learning_rate
print('\nWeights between Input and Hidden Layer (W2): \n', p1.weights_input_hidden)
print('\nWeights between Hidden and Output Layer (W3): \n', p1.weights_hidden_output)

p2 = MLP(4, 6, 3, learning_rate=1)  # Added learning_rate
print('\nWeights between Input and Hidden Layer (W2): \n', p2.weights_input_hidden)
print('\nWeights between Hidden and Output Layer (W3): \n', p2.weights_hidden_output)


Exercise 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# MLP Class
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        self.learning_rate = learning_rate
        
        # Initialize weights and biases
        self.weights_input_hidden = np.random.uniform(-1, 1, (input_size, hidden_size))
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.uniform(-1, 1, (hidden_size, output_size))
        self.bias_output = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigm(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.final_output = sigm(self.final_input)
        return self.final_output

    def backward(self, X, y, output):
        # Backpropagation
        output_error = y - output  # Output layer error
        output_delta = output_error * sigm_deriv(output)  # Output delta
        
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)  # Hidden layer error
        hidden_delta = hidden_error * sigm_deriv(self.hidden_output)  # Hidden delta
        
        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * self.learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate
        self.weights_input_hidden += np.dot(X.T, hidden_delta) * self.learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate

    def train(self, X, y, epochs, cost_function):
        losses = []
        for epoch in range(epochs):
            output = self.forward(X)
            loss = cost_function(y, output)
            losses.append(loss)
            self.backward(X, y, output)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        return losses

# Training Data
X_training = np.array([
    [1, 1, 0],
    [1, -1, -1],
    [-1, 1, 1],
    [-1, -1, 1],
    [0, 1, -1],
    [0, -1, -1],
    [1, 1, 1]
])

y_training = np.array([
    [1, 0],
    [0, 1],
    [1, 1],
    [1, 0],
    [1, 0],
    [1, 1],
    [1, 1]
])

# Parameters
input_size = 3
output_size = 2
hidden_sizes = [4, 8]  # Try different hidden layer sizes
learning_rates = [0.1, 0.01]  # Experiment with different learning rates
epochs = 2000

# Experiment with different hidden sizes and learning rates
for hidden_size in hidden_sizes:
    for lr in learning_rates:
        print(f"\nTraining with hidden_size={hidden_size}, learning_rate={lr}")
        mlp = MLP(input_size, hidden_size, output_size, learning_rate=lr)
        losses = mlp.train(X_training, y_training, epochs, cross_entropy)
        
        # Plot the loss curve
        plt.plot(losses, label=f"Hidden={hidden_size}, LR={lr}")

# Plot settings
plt.title("Loss Curves for Different Hidden Sizes and Learning Rates with Cross-Entropy")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

Exercise 3

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# MLP Class
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        self.learning_rate = learning_rate
        
        # Initialize weights and biases
        self.weights_input_hidden = np.random.uniform(-1, 1, (input_size, hidden_size))
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.uniform(-1, 1, (hidden_size, output_size))
        self.bias_output = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigm(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.final_output = sigm(self.final_input)
        return self.final_output

    def backward(self, X, y, output):
        # Backpropagation
        output_error = y - output  # Output layer error
        output_delta = output_error * sigm_deriv(output)  # Output delta
        
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)  # Hidden layer error
        hidden_delta = hidden_error * sigm_deriv(self.hidden_output)  # Hidden delta
        
        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * self.learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate
        self.weights_input_hidden += np.dot(X.T, hidden_delta) * self.learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate

    def train(self, X, y, epochs, cost_function):
        losses = []
        for epoch in range(epochs):
            output = self.forward(X)
            loss = cost_function(y, output)
            losses.append(loss)
            self.backward(X, y, output)
            if epoch % 500 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        return losses

# Training Data
data = np.array([
    [0, 1, 0, 0],  # Male, 1 car, Low travel cost, Low income
    [1, 0, 1, 1],  # Female, No car, Medium travel cost, Medium income
    [0, 2, 2, 2],  # Male, 2 cars, High travel cost, High income
    [1, 0, 2, 1],  # Female, No car, High travel cost, Medium income
    [0, 1, 1, 0],  # Male, 1 car, Medium travel cost, Low income
    [1, 0, 0, 2],  # Female, No car, Low travel cost, High income
    [1, 1, 1, 1],  # Female, 1 car, Medium travel cost, Medium income
    [0, 0, 2, 0],  # Male, No car, High travel cost, Low income
    [1, 2, 1, 2],  # Female, 2 cars, Medium travel cost, High income
    [0, 0, 0, 1]   # Male, No car, Low travel cost, Medium income
])

targets = np.array([
    [1, 0, 0],  # Bus
    [0, 1, 0],  # Car
    [0, 0, 1],  # Train
    [0, 0, 1],  # Train
    [1, 0, 0],  # Bus
    [0, 1, 0],  # Car
    [0, 1, 0],  # Car
    [1, 0, 0],  # Bus
    [0, 0, 1],  # Train
    [1, 0, 0]   # Bus
])

# Save to CSV file using Pandas
df = pd.DataFrame(data, columns=['Gender', 'Car Ownership', 'Travel Cost', 'Income Level'])
df['Target'] = [tuple(t) for t in targets]
df.to_csv('transport.csv', index=False)
print("Training data saved to transport.csv")

# Experiment with different hyperparameters
input_size = 4
output_size = 3
hidden_sizes = [6, 8]  # Experiment with different hidden layer sizes
learning_rates = [0.1, 0.01]  # Experiment with different learning rates
epochs = 2000

# Experiment with different hidden sizes and learning rates
for hidden_size in hidden_sizes:
    for lr in learning_rates:
        print(f"\nTraining with hidden_size={hidden_size}, learning_rate={lr}")
        mlp = MLP(input_size, hidden_size, output_size, learning_rate=lr)
        losses = mlp.train(data, targets, epochs, cross_entropy)
        
        # Plot the loss curve
        plt.plot(losses, label=f"Hidden={hidden_size}, LR={lr}")

# Plot settings
plt.title("Loss Curves for Different Hidden Sizes and Learning Rates with Cross-Entropy")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Prediction
test_instance = np.array([[1, 0, 2, 1]])  # Female, No car, High travel cost, Medium income
prediction = mlp.forward(test_instance)
predicted_class = np.argmax(prediction)  # Find the class with the highest probability

print(f"Predicted output: {prediction}")
print(f"Predicted transportation mode: {['Bus', 'Car', 'Train'][predicted_class]}")

Exercise 4

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

# Load data
iris_data = pd.read_csv('iris_data.csv')

# Separate features and target
X = iris_data.iloc[:, :-1].values
y = iris_data.iloc[:, -1].values

# Encode target variable
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_one_hot = to_categorical(y_encoded)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_one_hot, test_size=0.2, random_state=42)

# Experimenting with different hidden sizes and learning rates
hidden_sizes = [8, 10, 12]  # Experiment with different hidden layer sizes
learning_rates = [0.01, 0.001]  # Experiment with different learning rates
epochs = 100

# Initialize a dictionary to hold loss curves for each configuration
losses = []

# Create and train models for different hyperparameters
for hidden_size in hidden_sizes:
    for lr in learning_rates:
        print(f"\nTraining with hidden_size={hidden_size}, learning_rate={lr}")
        
        # Create model
        model = Sequential([
            Input(shape=(4,)),  # Define input shape explicitly
            Dense(hidden_size, activation='relu'),
            Dense(hidden_size, activation='relu'),
            Dense(y_one_hot.shape[1], activation='softmax')
        ])

        # Compile model with custom learning rate
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), 
                      loss='categorical_crossentropy', 
                      metrics=['accuracy'])

        # Train model
        history = model.fit(X_train, y_train, epochs=epochs, validation_split=0.2, verbose=0)
        
        # Record the loss history
        losses.append((hidden_size, lr, history.history['loss'], history.history['val_loss']))

        # Plot training history
        plt.figure(figsize=(12, 4))
        
        # Plot Training and Validation Loss
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'Training and Validation Loss (Hidden={hidden_size}, LR={lr})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'Training and Validation Accuracy (Hidden={hidden_size}, LR={lr})')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.tight_layout()
        plt.show()

# Evaluate with the best model configuration
best_model = model  # You can select the model with the best performance here

test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Prediction function
def predict_new_sample(sample):
    prediction = best_model.predict(sample)
    return le.inverse_transform([np.argmax(prediction)])

# Example usage: Predict for a new sample
new_sample = np.array([[5.1, 3.5, 1.4, 0.2]])  # Example sample, replace with actual input
scaled_sample = scaler.transform(new_sample)
print("Prediction for new sample:", predict_new_sample(scaled_sample))

# Print class labels
print("\nClass Labels:", le.classes_)