# Unit Testing

In [3]:
import numpy as np
import math
import pandas as pd

In [6]:
dataset = pd.read_csv('./data_banknote_authentication.txt',header=None)

In [7]:
dataset.head()

Unnamed: 0,0,1,2,3,4
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [76]:
dataset[:-10]

Unnamed: 0,0,1,2,3,4
0,3.621600,8.66610,-2.80730,-0.446990,0
1,4.545900,8.16740,-2.45860,-1.462100,0
2,3.866000,-2.63830,1.92420,0.106450,0
3,3.456600,9.52280,-4.01120,-3.594400,0
4,0.329240,-4.45520,4.57180,-0.988800,0
...,...,...,...,...,...
1357,-2.589900,-0.39110,0.93452,0.429720,1
1358,-1.011600,-0.19038,-0.90597,0.003003,1
1359,0.066129,2.49140,-2.94010,-0.621560,1
1360,-0.247450,1.93680,-2.46970,-0.805180,1


In [67]:
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [78]:
print('Shape of X: ',X.shape)
print('Shape of y:', y.shape)

print("Data type of X:", X.dtype)
print("Data type of y:", y.dtype)

Shape of X:  (1372, 4)
Shape of y: (1372,)
Data type of X: float64
Data type of y: int64


In [79]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

In [82]:
print('Shape of X: ',X_train.shape)
print('Shape of y:', y_train.shape)

print("Data type of X:", X_train.dtype)
print("Data type of y:", y_train.dtype)

Shape of X:  (960, 4)
Shape of y: (960,)
Data type of X: float64
Data type of y: int64


### Prediction Model

In [57]:
def model(X,w,b):
    
    m = X.shape[0]
    y_pred = np.zeros((m,))
    z= np.dot(X,w)+b

    for i in range(m):
        y_pred[i] = 1/(1+math.exp(-z[i]))

    return y_pred

In [58]:
m,n = X.shape
w_init = np.zeros((n,))
y_pred = model(X,w_init,0)

--------

### Cost Function

In [39]:
def compute_cost(X,y,w,b):
    m,n = X.shape
    y_pred = model(X,w,b)
    sum_cost = 0.0

    for i in range(m):
        sum_cost += (y[i]*math.log(y_pred[i]))+((1-y[i])*math.log(1-y_pred[i]))
    
    cost = -(sum_cost)/m

    return cost

In [41]:
m,n = X.shape
w_init = np.zeros((n,))
initial_cost = compute_cost(X,y,w_init,0)

In [42]:
print(initial_cost)

0.6931471805599209


-----

### Derivatives (Gradient Descent)

In [43]:
def compute_gradient(X,y,w,b):
    m,n = X.shape
    
    y_pred = model(X,w,b)
    cost = y_pred-y

    sum_cost_w = np.zeros((n,))
    sum_cost_b = 0.0

    for i in range(m):
        for j in range(n):
            sum_cost_w[j] += cost[i] * X[i,j]
        
        sum_cost_b += cost[i]
    
    dj_dw = sum_cost_w/m
    dj_db = sum_cost_b/m
    
    return dj_dw, dj_db

In [44]:
m,n = X.shape
w_init = np.zeros((n,))
dj_dw,dj_db = compute_gradient(X,y,w_init,0)

In [45]:
print(dj_dw, dj_db)

[ 1.04758918  1.40292688 -0.25632151 -0.04156379] 0.05539358600583091


------

### Gradient Descent

In [51]:
def compute_descent(X,y,w,b,alpha,num_iterations):

    for i in range(num_iterations):
        dj_dw,dj_db = compute_gradient(X,y,w,b)

        w = w - alpha*dj_dw
        b = b - alpha*dj_db

    return w,b

In [83]:
m,n = X.shape
w_init = np.zeros((n,))
b_init = 0.0
alpha = 0.001
num_iterations = 10000
print("Initial Cost: ",compute_cost(X_train,y_train,w_init,b_init))
w,b = compute_descent(X_train,y_train,w_init,b_init,alpha,num_iterations)
print("After Cost: ",compute_cost(X,y,w,b))

Initial Cost:  0.693147180559934
After Cost:  0.10098657643650422


In [84]:
w,b

(array([-1.15840209, -0.63604989, -0.66792977, -0.28057689]),
 0.5678682225781919)

### Testing using test set

In [85]:
print('Shape of X: ',X_test.shape)
print('Shape of y:', y_test.shape)

print("Data type of X:", X_test.dtype)
print("Data type of y:", y_test.dtype)

Shape of X:  (412, 4)
Shape of y: (412,)
Data type of X: float64
Data type of y: int64


In [88]:
y_pred = model(X_test,w,b)
threshold = 0.5
y_pred_labels = (y_pred > threshold).astype(int)
print("Predicted Labels:", y_pred_labels)


Predicted Labels: [0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1
 0 1 0 0 1 0 0 1 1 1 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1
 0 0 0 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 1 1 1 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 1
 0 1 1 0 0 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 1 1 1 0 1 0 1 1
 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 1
 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 0 1 0 1 0 1 0
 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0 0 0 1 1 1 0
 0 1 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0
 1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 1 0 1 0 1 0 1 0 0 1 0 0 0 1 1
 1 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 1 1 1 0 0 1 1 0 1 1 0 1 1 0 0 1 0 0
 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 1 1 1 1 1 1
 1 0 0 0 0]


In [90]:
cost = compute_cost(X_test,y_test,w,b)
print(cost)

0.09407598521593423


In [92]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred_labels))

0.9781553398058253


In [77]:
# Example data
X_test = np.array([[3.6216, 8.6661, -2.8073, -0.44699],
              [4.5459, 8.1674, -2.4586, -1.4621],
              [3.866, -2.6383, 1.9242, 0.10645],
              [-2.589900, -0.39110,	0.93452,0.429720]])

# Use the model function
y_pred_probabilities = model(X_test, w, b)

print(y_pred_probabilities)
# Convert probabilities to class labels using a threshold
threshold = 0.5
y_pred_labels = (y_pred_probabilities > threshold).astype(int)

print("Predicted Probabilities:", y_pred_probabilities)
print("Predicted Labels:", y_pred_labels)

[7.10957986e-04 3.39643730e-04 2.53813280e-02 9.57800708e-01]
Predicted Probabilities: [7.10957986e-04 3.39643730e-04 2.53813280e-02 9.57800708e-01]
Predicted Labels: [0 0 0 1]
