## Testing Various Methods of Binary Classification
### We will be testing a gradient descent implementation from scratch, a standardized descent, and a neural network to compare metrics.



In [72]:
import numpy as np
import pandas as pd

In [63]:
#Defining functions for classification and regression tasks, the following methods are needed:

# 1. A method to calculate the prediction of our model given certain instantiated params (weights and bias)
# 2. A method to calculate our model's accuracy (calculate the loss of the current model using logistic loss)
# 3. A method to calculate a single step of gradient descent in our model
# 4. A method to iteratively perform gradient descent to minimize our loss function
# 5. A method for calculating the accuracy of our model's prediction after training

def predict(w, b, x):
    def sigmoid(x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    z = sigmoid(np.dot(x,w)+b)
    return z

def loss(w, b, X, y, predict):
    z = predict(w, b, X)
    loss = -np.mean(y * np.log(z + 1e-10) + (1 - y) * np.log(1 - z + 1e-10))
    return loss

def gradient_step(w, b, X, y, predict):
    m = len(X)
    predictions = predict(w, b, X)
    err = predictions - y
    d_dw = np.dot(X.T, err) / m
    d_db = np.mean(err)
    return d_dw, d_db

def iterations(w, b, X, y, predict, gradient_step, iters, alpha, loss):
    for i in range(iters):
        dw, db = gradient_step(w, b, X, y, predict)
        w -= (alpha * dw)
        b -= (alpha * db)
        if i % 5000 == 0:print("Current loss: {loss}".format(loss=loss(w, b, X, y, predict)))
    return w, b
def check(X, predict, y, w, b):
    num_incorrect = 0
    for i in range(len(X)):
        z = predict(w, b, X[i])
        if z >=0.5: z = 1
        else: z = 0
        if z != y[i]: 
            num_incorrect +=1
    acc = ((len(x_test) - num_incorrect) / len(x_test)) * 100
    return num_incorrect, acc

In [64]:
#Sample data for testing
#The data is accumulated and cleaned, removing null values, irrelevant values, and features that are in direct correlation with each other.

data1, data2 = pd.read_csv("Data1.csv"), pd.read_csv("Data2.csv")
all_data = data1.merge(data2, how='inner',on='ID')
all_data = all_data.drop(columns='ID')
all_data = all_data.dropna()
all_data = all_data.drop(columns='Age')
all_data = all_data.drop(columns='ZipCode')

In [65]:
#Creating the training and testing split using sklearn
#We also have to convert the dataframes into numpy arrays

from sklearn.model_selection import train_test_split

Y = all_data['LoanOnCard']
X = all_data.drop(columns='LoanOnCard')

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1)
x_train, x_test, y_train, y_test = x_train.to_numpy(), x_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy()

In [66]:
#Initializing parameters and beginning gradient descent iterations:

w_in, b_in = np.zeros_like(x_train[0]), 0
w, b = iterations(w_in, b_in, x_train, y_train, predict, gradient_step, 50000, 0.001, loss)

Current loss: 0.6484622116359611
Current loss: 0.31496412759178166
Current loss: 0.3050743990781503
Current loss: 0.29771040946457733
Current loss: 0.2910618163580063
Current loss: 0.28488108925272426
Current loss: 0.27908420869144046
Current loss: 0.27362028041167097
Current loss: 0.2684510179420149
Current loss: 0.2635453671869523


In [67]:
#Testing the accuracy of the model on the testing set:
init_incorrect, init_acc = check(x_test, predict, y_test, w, b)
init_loss = loss(w, b, x_test, y_test, predict)

In [68]:
x_train_standardized = np.zeros_like(x_train)
for i in range(len(x_train)):
    mean = np.mean(x_train[i])
    std = np.std(x_train[i])
    x_train_standardized[i] = (x_train[i] - mean)/std
w_s, b_s =  iterations(w_in, b_in, x_train_standardized, y_train, predict, gradient_step, 50000, 0.001, loss)

Current loss: 0.7680205212435258
Current loss: 0.3057746435306232
Current loss: 0.295802577744056
Current loss: 0.28989834587407415
Current loss: 0.2858760754448538
Current loss: 0.28295153233979836
Current loss: 0.28072976391908283
Current loss: 0.2789853548810546
Current loss: 0.2775796295274175
Current loss: 0.2764224695808323


In [69]:
#Testing the accuracy of the standardized model on the testing set:
s_incorrect, s_acc = check(x_test, predict, y_test, w_s, b_s)
s_loss = loss(w_s, b_s, x_test, y_test, predict)

In [70]:
#Creating Neural Network Model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam

nn_model = Sequential([
    Dense(units=12, activation=tf.nn.relu),
    Dense(units=12, activation=tf.nn.relu),
    Dense(units=3, activation=tf.nn.relu),
    Dense(units=1, activation=None)
])
nn_model.compile(loss=BinaryCrossentropy(from_logits=True),optimizer='Adam',metrics=['Accuracy'])
nn_model.fit(x_train,y_train, epochs=100)

Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - Accuracy: 0.9022 - loss: 0.7836
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - Accuracy: 0.9090 - loss: 0.3404
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - Accuracy: 0.9020 - loss: 0.2986
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - Accuracy: 0.8980 - loss: 0.2841
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - Accuracy: 0.9038 - loss: 0.2573
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - Accuracy: 0.9045 - loss: 0.2390
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - Accuracy: 0.9033 - loss: 0.2385
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - Accuracy: 0.9007 - loss: 0.2237
Epoch 9/100
[1m109/109[0m [32

<keras.src.callbacks.history.History at 0x2737f0b8fd0>

In [35]:
#Testing the accuracy of the neural network on the testing set:
nn_loss, nn_acc = nn_model.evaluate(x_test, y_test)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - Accuracy: 0.9732 - loss: 0.0808 


In [71]:
#Overall Model Metrics

print("Accuracy of Original Model: {acc}".format(acc=init_acc))
print("Accuracy of Standardized Model: {acc}".format(acc=s_acc))
print("Accuracy of Neural Network Model: {acc}".format(acc=nn_acc * 100))

Accuracy of Original Model: 88.01874163319945
Accuracy of Standardized Model: 89.7590361445783
Accuracy of Neural Network Model: 97.38956093788147
