In [3]:
class NeuralNetwork:
    def __init__(self, layers):
        self.layers = layers
        self.weights = [np.random.randn(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.biases = [np.zeros((1, layers[i+1])) for i in range(len(layers)-1)]
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward_pass(self, X):
        activations = [X]
        for w, b in zip(self.weights, self.biases):
            z = np.dot(activations[-1], w) + b
            activations.append(self.sigmoid(z))
        return activations
    
#     def backward_pass(self, X, Y, activations):
#         deltas = []
#         delta = (activations[-1] - Y)  * self.sigmoid_derivative(activations[-1])
#         deltas.append(delta)
#         for i in range(len(self.layers) - 2, 0, -1):
#             delta = np.dot(deltas[0], self.weights[i].T) * self.sigmoid_derivative(activations[i])
#             deltas.insert(0, delta)
#         return deltas
    
    def backward_pass(self, X, Y, activations):
        deltas = [(activations[-1] - Y) * self.sigmoid_derivative(activations[-1])]
        for i in range(len(self.layers) - 2, 0, -1):
            delta = np.dot(deltas[0], self.weights[i].T) * self.sigmoid_derivative(activations[i])
            deltas.insert(0, delta)
        return deltas
    
    
    def compute_gradients(self, activations, deltas):
        gradients_weights = [np.dot(activations[i].T, deltas[i]) for i in range(len(self.layers) - 1)]
        gradients_biases = [np.sum(deltas[i], axis=0) for i in range(len(self.layers) - 1)]
        return gradients_weights, gradients_biases
    
    def update_weights(self, gradients_weights, gradients_biases, learning_rate):
        self.weights = [w - learning_rate * gw for w, gw in zip(self.weights, gradients_weights)]
        self.biases = [b - learning_rate * gb for b, gb in zip(self.biases, gradients_biases)]
    
    def train(self, X, Y, learning_rate, lam, max_iterations, epsilon):
        for iteration in range(max_iterations):
            activations = self.forward_pass(X)
            deltas = self.backward_pass(X, Y, activations)
            gradients_weights, gradients_biases = self.compute_gradients(activations, deltas)
            self.update_weights(gradients_weights, gradients_biases, learning_rate)
            # Compute cost function
            J = np.mean(np.square(activations[-1] - Y))
            #print(f"Iteration {iteration+1}, Cost: {J}")
            # Check for convergence
            if J < epsilon:
                print(f"Converged at cost :{J} while Epsilon:{epsilon} ")
                return J
        return J
            

    def accuracy(self, y_true, y_pred):
        correct = np.sum(np.all(y_true == y_pred, axis=1))
        return correct / len(y_true)


    def f1_score(self, y_true, y_pred):
        tp = np.sum(np.logical_and(y_true, y_pred))
        fp = np.sum(np.logical_and(np.logical_not(y_true), y_pred))
        fn = np.sum(np.logical_and(y_true, np.logical_not(y_pred)))
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        return f1


    def evaluate(self, X_test, y_test, J):
        activations = self.forward_pass(X_test)[-1]
        y_pred = (activations > 0.5).astype(int)
        acc = self.accuracy(y_test, y_pred)
        f1 = self.f1_score(y_test, y_pred)
        return J, acc, f1

In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold

# Load dataset
df_house_votes = pd.read_csv("/Users/noshitha/Downloads/hw4/datasets/hw3_house_votes_84.csv", delimiter=",")

# Extract features and target variable
X_house_votes = pd.get_dummies(df_house_votes.drop(columns=['class']))  # Features
y_house_votes = df_house_votes['class']  # Target variable

# Normalize data
y_house_votes_resized = y_house_votes.values.reshape(-1, 1)

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Fit and transform the target variable
y_encoded = encoder.fit_transform(y_house_votes_resized)

# Define model architectures and regularization parameters
architectures = [
    [X_house_votes.shape[1], 2, y_encoded.shape[1]],  # Architecture with fewer neurons
    [X_house_votes.shape[1], 10, 8, y_encoded.shape[1]],  # Architecture with a moderate number of neurons
    [X_house_votes.shape[1], 50, 40, 30, 20, 10, y_encoded.shape[1]],  # Architecture with a higher number of neurons AND MORE NEURONS
    [X_house_votes.shape[1], 100,100, 20, 30, 20, y_encoded.shape[1]],  # Architecture with fewer layers
    [X_house_votes.shape[1],1000, y_encoded.shape[1]],  # Architecture with more layers
    [X_house_votes.shape[1], 100, 100, 50, 50, 50, y_encoded.shape[1]]  # Architecture with more layers and neurons
]

regularization_params = [0.01, 0.1, 1.0]  # Example regularization parameters

# Initialize lists to store results
results_accuracy = {}
results_f1_score = {}
results_J_cost = {}

# Perform stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for arch in architectures:
    for lam in regularization_params:
        accuracy_list = []
        f1_score_list = []
        J_list = []
        for train_index, test_index in skf.split(X_house_votes, y_house_votes):
            X_train, X_test = X_house_votes.iloc[train_index], X_house_votes.iloc[test_index]
            y_train, y_test = y_encoded[train_index], y_encoded[test_index]

            mean = np.mean(X_train, axis=0)
            std = np.std(X_train, axis=0)
            X_train_normalized = (X_train - mean) / std
            X_test_normalized = (X_test - mean) / std

            model = NeuralNetwork(arch)
            J = model.train(X_train_normalized, y_train, learning_rate=0.01, lam=lam, max_iterations=1000, epsilon=0.005)
            J, accuracy, f1_score = model.evaluate(X_test_normalized, y_test, J)
            accuracy_list.append(accuracy)
            f1_score_list.append(f1_score)
            J_list.append(J)

        mean_accuracy = np.mean(accuracy_list)
        mean_f1_score = np.mean(f1_score_list)
        mean_J_cost = np.mean(J_list)

        results_accuracy[(str(arch), lam)] = mean_accuracy
        results_f1_score[(str(arch), lam)] = mean_f1_score
        results_J_cost[(str(arch), lam)] = mean_J_cost

# Convert the results into a DataFrame for tabular representation
accuracy_df = pd.DataFrame(list(results_accuracy.items()), columns=['Architecture, Lambda', 'Mean Accuracy'])
f1_score_df = pd.DataFrame(list(results_f1_score.items()), columns=['Architecture, Lambda', 'Mean F1 Score'])
J_cost_df = pd.DataFrame(list(results_J_cost.items()), columns=['Architecture, Lambda', 'Mean J Cost'])

print("Mean Accuracy Results:")
print(accuracy_df)
print("\nMean F1 Score Results:")
print(f1_score_df)
print("\nMean J cost Results:")
print(J_cost_df)


Converged at cost :0.0049936865026640005 while Epsilon:0.005 
Converged at cost :0.004985426897653145 while Epsilon:0.005 
Converged at cost :0.004999588718503817 while Epsilon:0.005 
Converged at cost :0.004999042560525168 while Epsilon:0.005 
Converged at cost :0.004995693043631537 while Epsilon:0.005 
Converged at cost :0.0049861891207344695 while Epsilon:0.005 
Converged at cost :0.004992687478627742 while Epsilon:0.005 
Converged at cost :0.00499769860962235 while Epsilon:0.005 
Converged at cost :0.004995752591360978 while Epsilon:0.005 
Converged at cost :0.004998209403077233 while Epsilon:0.005 
Converged at cost :0.0049973325125238 while Epsilon:0.005 
Converged at cost :0.004997631803144477 while Epsilon:0.005 
Converged at cost :0.004997943896574303 while Epsilon:0.005 
Converged at cost :0.004996354805813788 while Epsilon:0.005 
Converged at cost :0.004996818848957043 while Epsilon:0.005 
Converged at cost :0.004999752899932887 while Epsilon:0.005 
Converged at cost :0.0049

In [9]:
accuracy_df

Unnamed: 0,"Architecture, Lambda",Mean Accuracy
0,"([16, 2, 2], 0.01)",0.935465
1,"([16, 2, 2], 0.1)",0.946934
2,"([16, 2, 2], 1.0)",0.944662
3,"([16, 10, 8, 2], 0.01)",0.94926
4,"([16, 10, 8, 2], 0.1)",0.942389
5,"([16, 10, 8, 2], 1.0)",0.953858
6,"([16, 50, 40, 30, 20, 10, 2], 0.01)",0.93351
7,"([16, 50, 40, 30, 20, 10, 2], 0.1)",0.933192
8,"([16, 50, 40, 30, 20, 10, 2], 1.0)",0.953911
9,"([16, 100, 100, 20, 30, 20, 2], 0.01)",0.937791


In [10]:
f1_score_df

Unnamed: 0,"Architecture, Lambda",Mean F1 Score
0,"([16, 2, 2], 0.01)",0.936536
1,"([16, 2, 2], 0.1)",0.946934
2,"([16, 2, 2], 1.0)",0.944662
3,"([16, 10, 8, 2], 0.01)",0.95065
4,"([16, 10, 8, 2], 0.1)",0.943486
5,"([16, 10, 8, 2], 1.0)",0.955115
6,"([16, 50, 40, 30, 20, 10, 2], 0.01)",0.939119
7,"([16, 50, 40, 30, 20, 10, 2], 0.1)",0.935541
8,"([16, 50, 40, 30, 20, 10, 2], 1.0)",0.95506
9,"([16, 100, 100, 20, 30, 20, 2], 0.01)",0.942433


In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Function to generate mini-batches
def generate_mini_batches(X, y, batch_size):
    num_samples = X.shape[0]
    num_batches = num_samples // batch_size
    mini_batches = []
    shuffled_indices = np.random.permutation(num_samples)
    X_shuffled = X[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = start_idx + batch_size
        mini_batches.append((X_shuffled[start_idx:end_idx], y_shuffled[start_idx:end_idx]))
    if num_samples % batch_size != 0:
        mini_batches.append((X_shuffled[num_batches*batch_size:], y_shuffled[num_batches*batch_size:]))
    return mini_batches


def train_mini_batch(X_train, y_train, X_test, y_test, model, learning_rate, batch_size, max_iterations, epsilon):
    training_errors = []
    testing_errors = []
    for iteration in range(max_iterations):
        mini_batches = generate_mini_batches(X_train, y_train, batch_size)
        for mini_batch in mini_batches:
            X_mini_batch, y_mini_batch = mini_batch
            J = model.train(X_mini_batch, y_mini_batch, learning_rate=learning_rate, lam=lam, max_iterations=1, epsilon=epsilon)
        training_cost = np.mean(np.square(model.forward_pass(X_train)[-1] - y_train))  # Compute training cost
        testing_cost = np.mean(np.square(model.forward_pass(X_test)[-1] - y_test))  # Compute testing cost
        training_errors.append(training_cost)
        testing_errors.append(testing_cost)
        print(f"Iteration {iteration+1}, Training Cost: {training_cost}, Testing Cost: {testing_cost}")
        # Check for convergence
        if training_cost < epsilon:
            print(f"Converged at training cost :{training_cost} while Epsilon:{epsilon} ")
            break
    return training_errors, testing_errors

# Plot learning curve
def plot_learning_curve(training_errors, testing_errors, step_size):
    iterations = range(1, len(training_errors) + 1)
    plt.plot(iterations, training_errors, label='Training Error')
    plt.plot(iterations, testing_errors, label='Testing Error')
    plt.title('Learning Curve')
    plt.xlabel('Number of Training Examples')
    plt.ylabel('Error (J)')
    plt.xticks(np.arange(1, len(training_errors) + 1, step=step_size))
    plt.legend()
    plt.grid(True)
    plt.show()

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Define your neural network model and parameters
model = NeuralNetwork([X_house_votes.shape[1], 10, 8, y_encoded.shape[1]])  # Your desired architecture
learning_rate = 0.01
batch_size = 32
max_iterations = 1000
epsilon = 0.005

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_house_votes, y_encoded, test_size=0.2, random_state=42)

# Standardize features
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
X_train_normalized = (X_train - mean) / std
X_test_normalized = (X_test - mean) / std

# Train the model using mini-batch gradient descent
training_errors, testing_errors = train_mini_batch(X_train_normalized, y_train, X_test_normalized, y_test, model, learning_rate, batch_size, max_iterations, epsilon)

KeyError: "None of [Int64Index([127, 164,  45, 311, 300, 279, 196, 108, 198,  92,\n            ...\n            195, 236, 314,  22, 223,  52, 270, 325,  37, 104],\n           dtype='int64', length=348)] are in the [columns]"

In [None]:
# Plot the learning curve
step_size = 50  # Adjust as needed
plot_learning_curve(training_errors, testing_errors, step_size)