In [None]:
# exercise 5.1.1
import pandas as pd
import numpy as np

url = "https://hastie.su.domains/ElemStatLearn/datasets/SAheart.data"

# Load the SAheart dataset
df = pd.read_csv(url, index_col='row.names')


# Convert binary text data to numbered categories
df['famhist'] = pd.Categorical(df['famhist']).codes
y=np.asarray(np.asmatrix(df["chd"].values).T).squeeze()
df = df.drop(columns=["chd"])

# Attribute names
attributeNames = list(map(lambda x: x.capitalize(), df.columns.tolist()))

X = df.to_numpy()
N, M = X.shape




In [None]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)  # For reproducibility
    parameters = {
        "W1": np.random.randn(hidden_size, input_size) * 0.01,
        "b1": np.zeros((hidden_size, 1)),
        "W2": np.random.randn(output_size, hidden_size) * 0.01,
        "b2": np.zeros((output_size, 1))
    }
    return parameters


In [None]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(int)


In [None]:
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # First layer
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)  # or another activation function

    # Output layer
    Z2 = np.dot(W2, A1) + b2
    A2 = 1 / (1 + np.exp(-Z2))  # sigmoid activation

    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}

    return A2, cache


In [None]:
def compute_cost(Y, A2):
    m = Y.shape[1]
    cost = np.sum((A2 - Y) ** 2) / (2 * m)
    return cost

In [None]:
def backward_propagation(X, Y, parameters, cache):
    m = X.shape[1]
    W2 = parameters["W2"]

    dZ2 = cache["A2"] - Y
    dW2 = np.dot(dZ2, cache["A1"].T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m

    dZ1 = np.dot(W2.T, dZ2) * relu_derivative(cache["Z1"])
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads


In [None]:
def update_parameters(parameters, grads, learning_rate):
    for key in parameters.keys():
        parameters[key] -= learning_rate * grads["d" + key]
    return parameters


In [None]:
def train_neural_network(X, Y, input_size, hidden_size, output_size, epochs=1000, learning_rate=0.01):
    parameters = initialize_parameters(input_size, hidden_size, output_size)

    for i in range(epochs):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(Y, A2)
        grads = backward_propagation(X, Y, parameters, cache)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0:
            print(f"Epoch {i}: Cost = {cost}")

    return parameters


In [None]:
def predict(X, parameters):
    A2, _ = forward_propagation(X, parameters)
    return (A2 > 0.5).astype(int)


In [None]:
print(X)

In [None]:
Y= np.array(df['typea'])


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Neural Network Implementation
def initialize_parameters(input_size, hidden_size, output_size):
    """
    Initialize the weights and biases for a 2-layer neural network
    """
    np.random.seed(42)

    # Use He initialization for better convergence
    W1 = np.random.randn(hidden_size, input_size) * np.sqrt(2 / input_size)
    b1 = np.zeros((hidden_size, 1))
    W2 = np.random.randn(output_size, hidden_size) * np.sqrt(2 / hidden_size)
    b2 = np.zeros((output_size, 1))

    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

    return parameters

def sigmoid(Z):
    """
    Sigmoid activation function
    """
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    """
    ReLU activation function
    """
    return np.maximum(0, Z)

def forward_propagation(X, parameters):
    """
    Forward propagation through the network
    """
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # First layer with ReLU activation
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    # Output layer with sigmoid activation for binary classification
    # or linear activation for regression
    Z2 = np.dot(W2, A1) + b2
    A2 = Z2  # Linear activation for regression

    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}

    return A2, cache

def compute_cost(Y, A2):
    """
    Compute the Mean Squared Error cost
    """
    m = Y.shape[1]
    cost = np.sum((A2 - Y) ** 2) / (2 * m)
    return cost

def backward_propagation(X, Y, parameters, cache):
    """
    Backward propagation to compute gradients
    """
    m = X.shape[1]

    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1 = cache["Z1"]

    # Output layer gradient
    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m

    # Hidden layer gradient with ReLU derivative
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * (Z1 > 0)  # ReLU derivative
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    gradients = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}

    return gradients

def update_parameters(parameters, gradients, learning_rate):
    """
    Update parameters using gradient descent
    """
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = gradients["dW1"]
    db1 = gradients["db1"]
    dW2 = gradients["dW2"]
    db2 = gradients["db2"]

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

    return parameters

def train_neural_network(X, Y, input_size, hidden_size, output_size, epochs, learning_rate):
    """
    Train the neural network
    """
    parameters = initialize_parameters(input_size, hidden_size, output_size)

    costs = []

    for i in range(epochs):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(Y, A2)
        gradients = backward_propagation(X, Y, parameters, cache)
        parameters = update_parameters(parameters, gradients, learning_rate)

        if i % 100 == 0:
            costs.append(cost)
            print(f"Cost after iteration {i}: {cost}")

    return parameters, costs

def predict(X, parameters):
    """
    Make predictions with the trained network
    """
    A2, _ = forward_propagation(X, parameters)
    return A2

# Helper functions
def preprocess_data(X_df, Y_array):
    """
    Preprocess the data for the neural network
    """
    # Convert dataframe to numpy array if it's not already
    X = X_df.values if isinstance(X_df, pd.DataFrame) else X_df

    # Normalize the features (important for neural networks)
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    # Reshape Y to be a row vector (1, m)
    Y = Y_array.reshape(1, -1) if Y_array.ndim == 1 else Y_array

    # Transpose X to shape (n_features, n_samples)
    X = X.T

    return X, Y

def run_neural_network(X_df, Y_array, hidden_size=9, epochs=10000, learning_rate=0.01):
    """
    Run the complete neural network training and prediction process
    """
    # Preprocess data
    X, Y = preprocess_data(X_df, Y_array)

    # Get dimensions
    input_size = X.shape[0]
    output_size = 1

    print(f"Input shape: {X.shape}")
    print(f"Output shape: {Y.shape}")

    # Train the network
    parameters, costs = train_neural_network(
        X, Y, input_size, hidden_size, output_size, epochs, learning_rate
    )

    # Make predictions
    predictions = predict(X, parameters)

    # Calculate R-squared for regression
    SSres = np.sum((Y - predictions) ** 2)
    SStot = np.sum((Y - np.mean(Y)) ** 2)
    r_squared = 1 - (SSres / SStot)

    print(f"R-squared: {r_squared}")

    # Plot the cost over iterations
    plt.figure(figsize=(10, 6))
    plt.plot(range(0, epochs, 100), costs)
    plt.xlabel("Iterations (hundreds)")
    plt.ylabel("Cost")
    plt.title("Cost over Iterations")
    plt.grid(True)
    plt.show()

    return parameters, predictions, r_squared

# Example usage
# run_neural_network(X_df, Y_array, hidden_size=9, epochs=10000, learning_rate=0.01)

In [None]:
run_neural_network(X, Y, hidden_size=9, epochs=10000, learning_rate=0.01)

In [None]:
Y_reshaped = Y.reshape(1, -1)  # Shape becomes (1, 462)
X_transposed = X.T  # Shape becomes (9, 462)
Y_reshaped = Y.reshape(1, -1)  # Shape becomes (1, 462)
trained_parameters = train_neural_network(X_transposed, Y_reshaped, input_size=9, hidden_size=9, output_size=1, epochs=10000, learning_rate=0.1)