In [38]:
import numpy as np

def initialize_network_parameters(layer_dims):
    np.random.seed(42)
    parameters = {}
    number_layers = len(layer_dims)

    for i in range(1, number_layers):
        parameters[f'W{i}'] = np.random.randn(layer_dims[i-1], layer_dims[i]) * 0.01
        parameters[f'b{i}'] = np.zeros((1, layer_dims[i]))

    return parameters

def activation_relu(Z):
    return np.maximum(0, Z)

def activation_relu_derivative(dA, Z):
    dZ = np.where(Z > 0, dA, 0)
    return dZ

def activation_sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def activation_sigmoid_derivative(dA, Z):
    s = activation_sigmoid(Z)
    return dA * s * (1 - s)

def forward_propagation(X, parameters):
    caches = []
    A = X
    num_layers = len(parameters) // 2

    for i in range(1, num_layers):
        A_prev = A
        W = parameters[f'W{i}']
        b = parameters[f'b{i}']
        Z = np.dot(A_prev, W) + b
        A = activation_relu(Z)
        caches.append((A_prev, W, b, Z))

    Z_final = np.dot(A, parameters[f'W{num_layers}']) + parameters[f'b{num_layers}']
    A_final = activation_sigmoid(Z_final)
    caches.append((A, parameters[f'W{num_layers}'], parameters[f'b{num_layers}'], Z_final))

    return A_final, caches

def predict(X, parameters):

    A_final, _ = forward_propagation(X, parameters)
    predictions = A_final > 0.5  # Using 0.5 as the threshold for binary classification
    return predictions

def compute_cost(A_final, Y):
    m = Y.shape[0]
    cost = -np.sum(Y * np.log(A_final) + (1 - Y) * np.log(1 - A_final)) / m
    return np.squeeze(cost)

def backward_propagation(A_final, Y, caches):
    grads = {}
    num_layers = len(caches)
    m = A_final.shape[0]
    Y = Y.reshape(A_final.shape)
    dA_final = -(np.divide(Y, A_final) - np.divide(1 - Y, 1 - A_final))

    current_cache = caches[-1]
    grads[f'dA{num_layers}'], grads[f'dW{num_layers}'], grads[f'db{num_layers}'] = linear_activation_backward(dA_final, current_cache, 'sigmoid')

    for i in reversed(range(num_layers - 1)):
        current_cache = caches[i]
        dA_prev, dW, db = linear_activation_backward(grads[f'dA{i+2}'], current_cache, 'relu')
        grads[f'dA{i+1}'], grads[f'dW{i+1}'], grads[f'db{i+1}'] = dA_prev, dW, db

    return grads

def update_network_parameters(parameters, grads, learning_rate):
    num_layers = len(parameters) // 2

    for i in range(1, num_layers + 1):
        parameters[f'W{i}'] -= learning_rate * grads[f'dW{i}']
        parameters[f'b{i}'] -= learning_rate * grads[f'db{i}']

    return parameters

def linear_backward(dZ, cache):
    A_prev, W, _, _ = cache
    m = A_prev.shape[0]
    dW = np.dot(A_prev.T, dZ) / m
    db = np.sum(dZ, axis=0, keepdims=True) / m
    dA_prev = np.dot(dZ, W.T)

    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):
    _, W, _, Z = cache
    if activation == 'relu':
        dZ = activation_relu_derivative(dA, Z)
    elif activation == 'sigmoid':
        dZ = activation_sigmoid_derivative(dA, Z)
    return linear_backward(dZ, cache)


In [39]:
import numpy as np

def train(X_train, y_train, layer_dims, learning_rate=0.0075, num_iterations=3000, print_cost=True):
    np.random.seed(1)
    costs = []

    # Updated function name to initialize_network_parameters
    parameters = initialize_network_parameters(layer_dims)

    for i in range(num_iterations):
        # Updated function name to forward_propagation
        A_final, caches = forward_propagation(X_train, parameters)

        # Updated variable name A_final instead of AL
        cost = compute_cost(A_final, y_train)

        # Updated function name to backward_propagation and A_final instead of AL
        grads = backward_propagation(A_final, y_train, caches)

        # Updated function name to update_network_parameters
        parameters = update_network_parameters(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0:
            print(f"Cost after iteration {i}: {cost:.6f}")
            costs.append(cost)

    return parameters, costs


In [42]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import numpy as np

def load_and_prepare_data():
    # Load the dataset
    wine_data = load_wine()
    features = wine_data.data[:, :10]  # Using only the first 10 features
    targets = (wine_data.target == 1).astype(int)  # Adjust for binary classification

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

    # Normalize X_train and X_test
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)
    X_train_normalized = (X_train - mean) / std
    X_test_normalized = (X_test - mean) / std

    # Normalize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Ensure y_train and y_test are the correct shape
    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    return X_train_normalized, X_test_normalized, y_train, y_test

def train(X_train, y_train, layer_dimensions, learning_rate=0.01, num_iterations=1500, print_cost=True):
    # Assuming a training function that initializes parameters, conducts forward and backward propagation, and updates weights
    parameters = initialize_network_parameters(layer_dimensions)
    costs = []

    for i in range(num_iterations):
        AL, caches = forward_propagation(X_train, parameters)
        cost = compute_cost(AL, y_train)
        grads = backward_propagation(AL, y_train, caches)
        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0:
            print(f"Cost after iteration {i}: {cost:.6f}")
            costs.append(cost)

    return parameters, costs

# Main execution
X_train, X_test, y_train, y_test = load_and_prepare_data()
layer_dimensions = [10, 10, 8, 8, 4, 1]  # 5 layers
parameters, costs = train(X_train, y_train, layer_dimensions, learning_rate=0.01, num_iterations=1500, print_cost=True)


Cost after iteration 0: 0.693147
Cost after iteration 100: 0.685490
Cost after iteration 200: 0.680844
Cost after iteration 300: 0.678019
Cost after iteration 400: 0.676298
Cost after iteration 500: 0.675247
Cost after iteration 600: 0.674604
Cost after iteration 700: 0.674209
Cost after iteration 800: 0.673967
Cost after iteration 900: 0.673818
Cost after iteration 1000: 0.673726
Cost after iteration 1100: 0.673670
Cost after iteration 1200: 0.673635
Cost after iteration 1300: 0.673613
Cost after iteration 1400: 0.673600


In [43]:
from sklearn.metrics import confusion_matrix, classification_report

def accuracy(predictions, labels):
    """ Calculates the accuracy of predictions against the true labels. """
    return np.mean(predictions == labels) * 100

def predict_and_evaluate(X_test, parameters, y_test):
    # Predict test set outcomes
    test_predictions = predict(X_test, parameters)

    # Calculate the accuracy on the test set
    test_accuracy = accuracy(test_predictions, y_test)  # Ensure the accuracy function is correctly defined
    print(f"Test Accuracy: {test_accuracy:.2f}%")

    # Generate a confusion matrix
    test_confusion_matrix = confusion_matrix(y_test, test_predictions)
    print("Confusion Matrix:\n", test_confusion_matrix)

    # Detailed classification report (Precision, Recall, F1-Score)
    detailed_report = classification_report(y_test, test_predictions)
    print("Classification Report:\n", detailed_report)

    return test_accuracy

# Define your neural network architecture here, for example:
layer_dimensions = [10, 5, 1]  # Example layer dimensions

# Hyperparameter tuning setup
learning_rates = [0.1, 0.01, 0.001]
epochs = [500, 1500, 3000]
best_accuracy = 0
best_lr = 0
best_epoch = 0

for lr in learning_rates:
    for epoch in epochs:
        print(f"Training model with learning rate: {lr} and epochs: {epoch}")
        parameters, costs = train(X_train, y_train, layer_dimensions, learning_rate=lr, num_iterations=epoch, print_cost=False)
        current_accuracy = predict_and_evaluate(X_test, parameters, y_test)

        if current_accuracy > best_accuracy:
            best_accuracy = current_accuracy
            best_lr = lr
            best_epoch = epoch

print(f"Best Test Accuracy: {best_accuracy:.2f}% with Learning Rate: {best_lr} and Epochs: {best_epoch}")


Training model with learning rate: 0.1 and epochs: 500
Test Accuracy: 97.22%
Confusion Matrix:
 [[22  0]
 [ 1 13]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        22
           1       1.00      0.93      0.96        14

    accuracy                           0.97        36
   macro avg       0.98      0.96      0.97        36
weighted avg       0.97      0.97      0.97        36

Training model with learning rate: 0.1 and epochs: 1500
Test Accuracy: 97.22%
Confusion Matrix:
 [[22  0]
 [ 1 13]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        22
           1       1.00      0.93      0.96        14

    accuracy                           0.97        36
   macro avg       0.98      0.96      0.97        36
weighted avg       0.97      0.97      0.97        36

Training model with learning rate: 0.1 and epochs: 3000
Test Accura

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 100.00%
Confusion Matrix:
 [[22  0]
 [ 0 14]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        14

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

Training model with learning rate: 0.01 and epochs: 3000
Test Accuracy: 100.00%
Confusion Matrix:
 [[22  0]
 [ 0 14]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        14

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

Training model with learning rate: 0.001 and epochs: 500
Test Accuracy: 61.11%
Confusion Matrix:
 [[22  0]
 [14  0]]
Cl

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 61.11%
Confusion Matrix:
 [[22  0]
 [14  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      1.00      0.76        22
           1       0.00      0.00      0.00        14

    accuracy                           0.61        36
   macro avg       0.31      0.50      0.38        36
weighted avg       0.37      0.61      0.46        36

Training model with learning rate: 0.001 and epochs: 3000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 61.11%
Confusion Matrix:
 [[22  0]
 [14  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      1.00      0.76        22
           1       0.00      0.00      0.00        14

    accuracy                           0.61        36
   macro avg       0.31      0.50      0.38        36
weighted avg       0.37      0.61      0.46        36

Best Test Accuracy: 100.00% with Learning Rate: 0.01 and Epochs: 1500


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
