In [1]:
## A Basic MATH Two Layer Nerual Optimization Model based on Assumptions with an accuracy of 85%

In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from datetime import datetime, timedelta
import requests
import io
data = pd.read_csv('train.csv')
data

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

# Optimized weight initialization
def init_params():
    W1 = np.random.randn(10, 784) * np.sqrt(2. / 784)  # He initialization
    b1 = np.zeros((10, 1))
    W2 = np.random.randn(10, 10) * np.sqrt(2. / 10)
    b2 = np.zeros((10, 1))
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def ReLU_deriv(Z):
    return Z > 0

def softmax(Z):
    Z_exp = np.exp(Z - np.max(Z, axis=0, keepdims=True))  # Numerical stability
    return Z_exp / np.sum(Z_exp, axis=0, keepdims=True)

def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    return one_hot_Y.T

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = X.shape[1]
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 -= alpha * dW1
    b1 -= alpha * db1
    W2 -= alpha * dW2
    b2 -= alpha * db2
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, axis=0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        
        if i % 50 == 0 or i == iterations - 1:
            predictions = get_predictions(A2)
            acc = get_accuracy(predictions, Y)
            print(f"Iteration {i}, Accuracy: {acc:.4f}")
    return W1, b1, W2, b2

# Prediction functions
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    return get_predictions(A2)

def test_prediction(index, X, Y, W1, b1, W2, b2):
    current_image = X[:, index, None]
    prediction = make_predictions(current_image, W1, b1, W2, b2)
    label = Y[index]
    print(f"Prediction: {prediction}, Label: {label}")

# Train the model
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, alpha=0.05, iterations=2000)


# Example predictions
test_prediction(0, X_train, Y_train, W1, b1, W2, b2)
test_prediction(1, X_train, Y_train, W1, b1, W2, b2)
test_prediction(2, X_train, Y_train, W1, b1, W2, b2)

# Test predictions on the development set
dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
dev_accuracy = get_accuracy(dev_predictions, Y_dev)
print(f"Development Set Accuracy: {dev_accuracy:.4f}")

Iteration 0, Accuracy: 0.1034
Iteration 50, Accuracy: 0.5561
Iteration 100, Accuracy: 0.7069
Iteration 150, Accuracy: 0.7724
Iteration 200, Accuracy: 0.8204
Iteration 250, Accuracy: 0.8439
Iteration 300, Accuracy: 0.8576
Iteration 350, Accuracy: 0.8667
Iteration 400, Accuracy: 0.8743
Iteration 450, Accuracy: 0.8804
Iteration 500, Accuracy: 0.8850
Iteration 550, Accuracy: 0.8882
Iteration 600, Accuracy: 0.8911
Iteration 650, Accuracy: 0.8934
Iteration 700, Accuracy: 0.8952
Iteration 750, Accuracy: 0.8974
Iteration 800, Accuracy: 0.8992
Iteration 850, Accuracy: 0.9006
Iteration 900, Accuracy: 0.9023
Iteration 950, Accuracy: 0.9037
Iteration 1000, Accuracy: 0.9050
Iteration 1050, Accuracy: 0.9062
Iteration 1100, Accuracy: 0.9078
Iteration 1150, Accuracy: 0.9088
Iteration 1200, Accuracy: 0.9099
Iteration 1250, Accuracy: 0.9105
Iteration 1300, Accuracy: 0.9117
Iteration 1350, Accuracy: 0.9124
Iteration 1400, Accuracy: 0.9134
Iteration 1450, Accuracy: 0.9144
Iteration 1500, Accuracy: 0.9152
