In [1]:
import numpy as np

# Creating functions for tasks related to Neural Network

In [2]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(42)
    parameters = {}
    L = len(layer_dims)

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l-1], layer_dims[l]) * 0.01
        parameters['b' + str(l)] = np.zeros((1, layer_dims[l]))

    return parameters

def relu(Z):
    return np.maximum(0, Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def forward_propagation_deep(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2

    for l in range(1, L):
        A_prev = A
        Z = A_prev.dot(parameters['W' + str(l)]) + parameters['b' + str(l)]
        A = relu(Z)
        cache = (A_prev, parameters['W' + str(l)], parameters['b' + str(l)], Z)
        caches.append(cache)

    ZL = A.dot(parameters['W' + str(L)]) + parameters['b' + str(L)]
    AL = sigmoid(ZL)
    cache = (A, parameters['W' + str(L)], parameters['b' + str(L)], ZL)
    caches.append(cache)

    return AL, caches

def compute_cost(AL, Y):
    m = Y.shape[0]
    # Compute the cross-entropy cost
    cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m
    print(cost)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    return cost


def backward_propagation_deep(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[0]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = caches[-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l+2)], current_cache, "relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

def update_parameters_deep(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]

    return parameters

def linear_backward(dZ, cache):
    A_prev, W, b, Z = cache
    m = A_prev.shape[0]

    dW = 1./m * np.dot(A_prev.T, dZ)
    db = 1./m * np.sum(dZ, axis=0, keepdims=True)
    dA_prev = np.dot(dZ, W.T)

    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):
    A_prev, W, b, Z = cache
    if activation == "relu":
        dZ = relu_backward(dA, Z)
        dA_prev, dW, db = linear_backward(dZ, cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, Z)
        dA_prev, dW, db = linear_backward(dZ, cache)

    return dA_prev, dW, db

def sigmoid_backward(dA, Z):
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

def accuracy(predictions, labels):
    """
    This function calculates the accuracy of predictions against the true labels
    """
    return np.mean(predictions == labels) * 100

## Function for training model

In [3]:
def train(X_train, y_train, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=True):
    np.random.seed(1)
    costs = []

    parameters = initialize_parameters_deep(layers_dims)

    for i in range(num_iterations):
        # Forward propagation
        AL, caches = forward_propagation_deep(X_train, parameters)

        # Compute cost
        cost = compute_cost(AL, y_train)

        # Backward propagation
        grads = backward_propagation_deep(AL, y_train, caches)

        # Update parameters
        parameters = update_parameters_deep(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
            costs.append(cost)

    return parameters, costs


def predict(X, parameters):
    """
    This function is used to predict the outcomes based on the model parameters
    """
    AL, _ = forward_propagation_deep(X, parameters)
    predictions = AL > 0.5  # Using 0.5 as the threshold for binary classification
    return predictions

## Loading dataset and training model

In [4]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset
wine = load_wine()
X = wine.data[:, :10]  # Using only the first 10 features
y = wine.target

# Adjust y for binary classification, class 1 vs. not class 1
y = (y == 1).astype(int)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize X_train and X_test
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

# Ensure y_train and y_test are the correct shape
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Define the layers dimensions
layers_dims = [10, 10, 8, 8, 4, 1]  # 5 layers as specified

# Now we can train the model with these prepared datasets
parameters, costs = train(X_train, y_train, layers_dims, learning_rate=0.01, num_iterations=1500, print_cost=True)


0.6931471805582817
Cost after iteration 0: 0.693147
0.6930500991209114
0.6929534991667131
0.6928573814708194
0.6927617437788859
0.6926665836932666
0.6925718988278196
0.6924776868079713
0.6923839452711091
0.6922906718661795
0.692197864253561
0.6921055201050484
0.6920136371039654
0.6919222129450188
0.6918312453342486
0.6917407319889751
0.6916506706377408
0.6915610590202885
0.6914718948874772
0.6913831760013011
0.6912949001346294
0.6912070650713841
0.6911196686064083
0.6910327085454159
0.6909461827049407
0.6908600889122776
0.6907744250054443
0.6906891888331691
0.6906043782547563
0.6905199911400973
0.6904360253695975
0.6903524788341113
0.690269349434939
0.6901866350837548
0.6901043337025583
0.6900224432236259
0.6899409615894662
0.6898598867527658
0.6897792166763446
0.6896989493331073
0.6896190827060049
0.6895396147879745
0.6894605435818969
0.6893818671005503
0.6893035833665637
0.6892256904123695
0.6891481862801583
0.689071069021832
0.6889943366989586
0.6889179873827252
0.6888420191538941
0

## Model Performance Metrics

In [7]:
from sklearn.metrics import confusion_matrix, classification_report

# Use the trained model to predict the test set
predictions_test = predict(X_test, parameters)

# Calculate the accuracy on the test set
test_accuracy = accuracy(predictions_test, y_test.reshape(-1, 1))

print(f"Test Accuracy: {test_accuracy:.2f}%")


# Generate a confusion matrix
conf_matrix = confusion_matrix(y_test, predictions_test)
print("Confusion Matrix:\n", conf_matrix)

# Detailed classification report (Precision, Recall, F1-Score)
print("Classification Report:\n", classification_report(y_test, predictions_test))

Test Accuracy: 61.11%
Confusion Matrix:
 [[22  0]
 [14  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      1.00      0.76        22
           1       0.00      0.00      0.00        14

    accuracy                           0.61        36
   macro avg       0.31      0.50      0.38        36
weighted avg       0.37      0.61      0.46        36



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Tuning model

In [8]:
learning_rates = [0.1, 0.01, 0.001]
epochs = [500, 1500, 3000]
best_accuracy = 0
best_lr = 0
best_epoch = 0

for lr in learning_rates:
    for epoch in epochs:
        print(f"Training model with learning rate: {lr} and epochs: {epoch}")
        parameters, costs = train(X_train, y_train, layers_dims, learning_rate=lr, num_iterations=epoch, print_cost=False)
        predictions_test = predict(X_test, parameters)
        test_accuracy = accuracy(predictions_test, y_test.reshape(-1, 1))
        print(f"Test Accuracy: {test_accuracy:.2f}%")

        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_lr = lr
            best_epoch = epoch

print(f"Best Test Accuracy: {best_accuracy:.2f}% with Learning Rate: {best_lr} and Epochs: {best_epoch}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0.6930889375116883
0.6930792472008628
0.6930695617349649
0.6930598811114942
0.69305020532775
0.6930405343815971
0.6930308682697378
0.693021206991278
0.693011550542349
0.6930018989217395
0.692992252125987
0.6929826101534634
0.6929729730010749
0.6929633406672724
0.6929537131501186
0.6929440904451238
0.6929344725513386
0.6929248594664915
0.6929152511874003
0.6929056477127801
0.6928960490387736
0.6928864551643771
0.6928768660869142
0.6928672818026993
0.6928577023103343
0.6928481276077395
0.6928385576915126
0.6928289925604627
0.6928194322110257
0.6928098766416088
0.6928003258501276
0.6927907798327639
0.692781238587467
0.6927717021133886
0.6927621704069172
0.6927526434658545
0.6927431212874419
0.6927336038701424
0.692724091210589
0.6927145833074867
0.6927050801582736
0.6926955817594758
0.692686088108822
0.6926765992055655
0.6926671150465178
0.6926576356285341
0.6926481609490794
0.6926386910073381
0.6926292257999234
0.6926197653

# The best performance metrics where given by model whose learning rate was 0.1 and epochs were 500