In [27]:
import numpy as np
import os
import cv2

# Define a function to load and preprocess the images
def load_images(path):
    """ Load images from the given path , classify them based on their flower type and resize them to 150x150 pixels.
    then convert them to grayscale and return images(x) and labels(y) as numpy arrays."""
    images = []
    labels = []
    label_dict = {'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}
    for folder in os.listdir(path):
        label = label_dict[folder]
        for file in os.listdir(os.path.join(path, folder)):
            img = cv2.imread(os.path.join(path, folder, file))
            # convert to grayscale
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # resize the image to 150x150 pixels
            img = cv2.resize(img, (150, 150))
            # scale pixel values to range [0, 1]
            img = img.astype('float32') / 255
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)
from sklearn.model_selection import train_test_split

# Load the images
X, y = load_images('flowers')

# 1. Preprocess the data by scaling
# the pixel values to the range [0, 1], and split the dataset into training and validation sets.
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


# Set the number of neurons in each layer
n_input = 150 * 150
n_hidden1 = 64
n_hidden2 = 32
n_output = 5

# 2. Initialize the weights and biases of the MLP using random values.
weights = {
    'w1': np.random.randn(n_input, n_hidden1),
    'w2': np.random.randn(n_hidden1, n_hidden2),
    'w3': np.random.randn(n_hidden2, n_output)
}

biases = {
    'b1': np.random.randn(n_hidden1),
    'b2': np.random.randn(n_hidden2),
    'b3': np.random.randn(n_output)
}

from scipy.special import softmax

def forward_pass(x, weights, biases):
    # Compute the output of the first hidden layer
    z1 = np.dot(x, weights['w1']) + biases['b1']
    a1 = np.maximum(z1, 0) # ReLU activation function

    # Compute the output of the second hidden layer
    z2 = np.dot(a1, weights['w2']) + biases['b2']
    a2 = np.maximum(z2, 0) # ReLU activation function

    # Compute the output of the output layer
    z3 = np.dot(a2, weights['w3']) + biases['b3']
    a3 = softmax(z3, axis=1) # Softmax activation function

    return a3, a1, a2

# Define the loss function (cross-entropy loss)
def loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / m
    return loss

# Define the derivative of the ReLU activation function
def relu_derivative(z):
    return (z > 0).astype(int)

# Define the backward pass function
def backward_pass(x, y_true, y_pred, a1, a2, weights):
    m = y_true.shape[0]
    # Compute the gradient of the loss with respect to z3
    dz3 = y_pred - y_true
    # Compute the gradient of the loss with respect to w3 and b3
    dw3 = np.dot(a2.T, dz3) / m
    db3 = np.sum(dz3, axis=0) / m

    # Compute the gradient of the loss with respect to z2
    da2 = np.dot(dz3, weights['w3'].T)
    dz2 = da2 * relu_derivative(a2)
    # Compute the gradient of the loss with respect to w2 and b2
    dw2 = np.dot(a1.T, dz2) / m
    db2 = np.sum(dz2, axis=0) / m

    # Compute the gradient of the loss with respect to z1
    da1 = np.dot(dz2, weights['w2'].T)
    dz1 = da1 * relu_derivative(a1)
    # Compute the gradient of the loss with respect to w1 and b1
    dw1 = np.dot(x.T, dz1) / m
    db1 = np.sum(dz1, axis=0) / m

    gradients = {
        'dw1': dw1,
        'dw2': dw2,
        'dw3': dw3,
        'db1': db1,
        'db2': db2,
        'db3': db3
    }
    
    return gradients

# Define the update_weights function
def update_weights(weights, biases, gradients, learning_rate):
    weights['w1'] -= learning_rate * gradients['dw1']
    weights['w2'] -= learning_rate * gradients['dw2']
    weights['w3'] -= learning_rate * gradients['dw3']
    
    biases['b1'] -= learning_rate * gradients['db1']
    biases['b2'] -= learning_rate * gradients['db2']
    biases['b3'] -= learning_rate * gradients['db3']

from sklearn.metrics import accuracy_score, classification_report, roc_curve, auc
import matplotlib.pyplot as plt

# Set the learning rate and number of epochs
learning_rate = 0.01
epochs = 10

# Train the MLP for 10 epochs
for epoch in range(epochs):
    # Forward pass
    a3, a1, a2 = forward_pass(X_train.reshape(X_train.shape[0], -1), weights, biases)
    
    # Compute the loss
    y_train_one_hot = np.eye(n_output)[y_train]
    train_loss = loss(y_train_one_hot, a3)
    
    # Backward pass
    gradients = backward_pass(X_train.reshape(X_train.shape[0], -1), y_train_one_hot, a3, a1=a1,a2=a2 ,weights=weights)
    
    # Update weights and biases
    update_weights(weights, biases, gradients, learning_rate)
    
    # Compute the accuracy on the validation set
    y_val_pred = forward_pass(X_val.reshape(X_val.shape[0], -1), weights, biases)
    y_val_pred = np.argmax(y_val_pred, axis=1)
    val_acc = accuracy_score(y_val, y_val_pred)
    
    print(f'Epoch: {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Acc: {val_acc:.4f}')

# Evaluate the final accuracy of the trained MLP on the test set
y_val_pred = forward_pass(X_val.reshape(X_val.shape[0], -1), weights, biases)
y_val_pred = np.argmax(y_val_pred, axis=1)
test_acc = accuracy_score(y_val, y_val_pred)

print(f'Test Acc: {test_acc:.4f}')
print(classification_report(y_val, y_val_pred))

# Plot the ROC curve for the classifier
fpr, tpr, thresholds = roc_curve(y_val, y_val_pred)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()



  loss = -np.sum(y_true * np.log(y_pred)) / m
  loss = -np.sum(y_true * np.log(y_pred)) / m


ValueError: could not broadcast input array from shape (864,5) into shape (864,)