In [18]:
import pandas as pd # Data manipulation and analysis
import numpy as np # Mathematical functions and computing
from sklearn.model_selection import train_test_split # Split arrays
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score # Evaluation metrics
from sklearn.preprocessing import StandardScaler # Standardization


class MLP:
    def __init__(self, learning_rate, n_iterations=2000, n_hidden_units=10):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.n_hidden_units = n_hidden_units
        self.weights_input_hidden = None
        self.weights_hidden_output = None

    def sigmoid(self, x): # Activation function 1
        return 1 / (1 + np.exp(-x))

    def relu(self, x): # Activation function 2
        return np.maximum(0, x)

    def tanh(self, x): # Activation function 3
        return np.tanh(x)

    def sigmoid_derivative(self, x): # Derivative for sigmoid
        return x * (1 - x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def tanh_derivative(self, x):
        return 1 - np.tanh(x)**2
        
    def fit(self, X, y, activation_function='sigmoid'):  # Train the neural network
        np.random.seed(42)
        self.weights_input_hidden = np.random.uniform(low=-0.1, high=0.1, size=(X.shape[1], self.n_hidden_units))
        self.weights_hidden_output = np.random.uniform(low=-0.1, high=0.1, size=(self.n_hidden_units, y.shape[1]))

        for _ in range(self.n_iterations):
            if activation_function == 'sigmoid':
                activation_function = self.sigmoid
                activation_derivative = self.sigmoid_derivative
            elif activation_function == 'relu':
                activation_function = self.relu
                activation_derivative = self.relu_derivative
            elif activation_function == 'tanh':
                activation_function = self.tanh
                activation_derivative = self.tanh_derivative
            
            hidden_layer_input = np.dot(X, self.weights_input_hidden) # Hidden layer input and output
            hidden_layer_output = activation_function(hidden_layer_input)

            output_layer_input = np.dot(hidden_layer_output, self.weights_hidden_output)
            output_layer_output = self.sigmoid(output_layer_input)

            error = y - output_layer_output # Compute error
            d_output = error * self.sigmoid_derivative(output_layer_output)

            error_hidden_layer = d_output.dot(self.weights_hidden_output.T)
            d_hidden_layer = error_hidden_layer * activation_derivative(hidden_layer_output)

            self.weights_hidden_output += hidden_layer_output.T.dot(d_output) * self.learning_rate
            self.weights_input_hidden += X.T.dot(d_hidden_layer) * self.learning_rate

    def predict(self, X): # Function to make predictions
        hidden_layer_input = np.dot(X, self.weights_input_hidden)
        hidden_layer_output = self.sigmoid(hidden_layer_input)

        output_layer_input = np.dot(hidden_layer_output, self.weights_hidden_output)
        output_layer_output = self.sigmoid(output_layer_input)

        return np.argmax(output_layer_output, axis=1)


# Define the Gradient Descent Delta Rule algorithm
class GradientDescentDelta:
    def __init__(self, learning_rate, n_iterations=2000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None

    def fit(self, X, y):
        np.random.seed(42)
        self.weights = np.random.uniform(low=-0.1, high=0.1, size=X.shape[1])  # Small random initial weights around zero
        for _ in range(self.n_iterations): # Compute error, weights
            errors = y - self.predict(X)
            update = self.learning_rate * np.dot(X.T, errors)  # Update based on the transpose of X
            self.weights += update

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, X):
        return np.where(self.sigmoid(np.dot(X, self.weights)) >= 0.5, 1, 0)

    def accuracy(self, X, y): # Tells the accuracy of gradient rule
        predictions = self.predict(X)
        correct = np.sum(predictions == y)
        return correct / len(y)


# Load the Iris dataset from iris.data
iris_df = pd.read_csv('iris.data', header=None, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])

# Convert species names to numerical labels
iris_df['species'] = iris_df['species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

# Split the dataset into features (X) and target (y)
X = iris_df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values
y = iris_df['species'].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets (80/20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ask for user input
learning_rate = 0.2  # Fixed learning rate for the example

# Create an instance of GradientDescentDelta and fit the data
gd = GradientDescentDelta(learning_rate)
gd.fit(X_train, y_train)
gd_accuracy = gd.accuracy(X_test, y_test)

# Evaluate MLP model performance on testing set for each activation function
activation_functions = ['sigmoid', 'relu', 'tanh']
best_accuracy = 0
best_predictions = None

for activation_function in activation_functions:
    mlp_model = MLP(learning_rate)
    mlp_model.fit(X_train, np.eye(3)[y_train], activation_function)
    mlp_predictions = mlp_model.predict(X_test)
    mlp_accuracy = accuracy_score(y_test, mlp_predictions)
    if mlp_accuracy > best_accuracy:
        best_accuracy = mlp_accuracy
        best_predictions = mlp_predictions
    print(f"\nAccuracy with {activation_function} activation function: {mlp_accuracy:.2%}")

# Calculate confusion matrix and classification report for the best MLP model
conf_matrix = confusion_matrix(y_test, best_predictions)
class_report = classification_report(y_test, best_predictions, zero_division=1)

print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
print("\nGradient Descent Result:")
print(f"Accuracy: {gd_accuracy:.2%}")


  return 1 / (1 + np.exp(-x))



Accuracy with sigmoid activation function: 100.00%

Accuracy with relu activation function: 56.67%

Accuracy with tanh activation function: 56.67%

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Gradient Descent Result:
Accuracy: 60.00%
