In [514]:
import numpy as np
import pandas as pd
np.random.seed(0)

df = pd.read_table('spambase.data', delimiter=',', header=None)

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
0,0.0,0.64,0.64,0.0,0.32,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.778,0.0,0.0,3.756,61,278,1
1,0.21,0.28,0.5,0.0,0.14,0.28,0.21,0.07,0.0,0.94,...,0.0,0.132,0.0,0.372,0.18,0.048,5.114,101,1028,1
2,0.06,0.0,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.01,0.143,0.0,0.276,0.184,0.01,9.821,485,2259,1
3,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.137,0.0,0.137,0.0,0.0,3.537,40,191,1
4,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.135,0.0,0.135,0.0,0.0,3.537,40,191,1


In [515]:
df = df.sample(frac=1) # Shuffle df

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
991,0.1,0.2,1.01,0.0,0.8,0.8,0.5,0.0,0.8,0.1,...,0.0,0.111,0.0,0.491,0.158,0.015,8.55,669,1351,1
2565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.552,0.0,0.0,0.0,0.0,1.0,1,4,0
1906,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.666,4,15,0
1471,0.0,0.82,0.32,0.0,1.14,0.32,0.0,0.16,0.0,0.65,...,0.0,0.0,0.0,0.0,0.29,0.029,2.257,13,158,1
1813,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.022,0.022,0.019,0.022,0.022,0.022,3.482,5,5902,0


In [516]:
# Split into training and validating dfs
split_index = len(df) * 2 // 3
df_tr = df.iloc[:split_index, :]
df_v = df.iloc[split_index:, :]

df_tr.shape, df_v.shape

((3067, 58), (1534, 58))

In [517]:
# Separate target values 
Y_tr = df_tr.iloc[:, -1].to_numpy()
Y_v = df_v.iloc[:, -1].to_numpy()

Y_tr.shape, Y_v.shape

((3067,), (1534,))

In [518]:
# Drop target values from df
drop_target_values = lambda df: df.iloc[:, :-1]

X_tr = drop_target_values(df_tr).to_numpy()
X_v = drop_target_values(df_v).to_numpy()

X_tr

array([[1.000e-01, 2.000e-01, 1.010e+00, ..., 8.550e+00, 6.690e+02,
        1.351e+03],
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 1.000e+00, 1.000e+00,
        4.000e+00],
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 1.666e+00, 4.000e+00,
        1.500e+01],
       ...,
       [8.900e-01, 0.000e+00, 0.000e+00, ..., 1.583e+00, 9.000e+00,
        7.600e+01],
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 1.720e+00, 1.100e+01,
        4.300e+01],
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 3.125e+00, 1.500e+01,
        5.000e+01]], shape=(3067, 57))

In [519]:
# Standadrize data based on TRAINING data only 
m = np.mean(X_tr, axis=0, keepdims=True)
s = np.std(X_tr, axis=0, keepdims=True)
zScore = lambda X: (X - m) / s

X_tr = zScore(X_tr)
X_v = zScore(X_v)

X_tr.shape, X_v.shape

((3067, 57), (1534, 57))

In [520]:
# Insert bias features 
X_tr = np.insert(X_tr, 0, 1, axis=1)
X_v = np.insert(X_v, 0, 1, axis=1)

X_tr.shape, X_v.shape

((3067, 58), (1534, 58))

In [521]:
# Initialize weights randomly in range [-10^-4, 10^-4]
W = np.random.uniform(-1e-4, 1e-4, size=X_tr.shape[1])

W, W.shape

(array([-9.46557761e-05,  9.07611718e-05, -5.91838827e-05, -5.14836456e-05,
         3.31809179e-05,  4.04364951e-05,  9.18036728e-05,  1.90533051e-05,
        -1.19340089e-05, -7.51237338e-05,  7.14820680e-05, -9.11470289e-05,
         7.04458230e-05, -3.27372079e-05,  8.16058683e-05,  7.01245665e-05,
         3.97849522e-05, -9.57317233e-05,  3.83331197e-05,  1.23649887e-05,
        -4.95633466e-05,  1.53739572e-05,  6.99087594e-05, -5.78696752e-05,
        -3.20703306e-05, -8.23378356e-05,  5.56747908e-05, -8.01313856e-05,
         8.23980942e-05,  2.17111202e-06, -4.53495408e-05, -8.59947105e-05,
         6.35581516e-05, -2.15422766e-05,  7.69658326e-05,  4.41103151e-05,
        -8.31691376e-05,  5.44249986e-05,  6.45260492e-05, -7.99629778e-05,
         3.40496746e-05, -5.58037931e-05, -3.29296543e-05, -7.19186306e-05,
        -9.12404472e-05,  7.15012927e-05,  8.65764876e-05,  4.43545200e-05,
        -6.71245978e-05, -9.98174161e-05, -2.73065697e-05,  9.06279884e-06,
        -1.6

In [522]:
# Calculate gradient
# Adjust Weights based on ETA, EPOCHS, and EPSILON

# Constants 
EPOCHS = range(10000)
ETA = 0.1
EPSILON = 10**-7
DIFF_THRESHOLD = 2**-32

# Initialize 
prev_loss_tr = float('inf')
losses_tr, losses_v = [], [] # Store log losses
Yhat_tr: None
Yhat_v: None

# Calculations for a logistic classifier
sigmoid = lambda X, W: 1 / (1 + np.exp(-np.dot(X, W)))
log_loss = lambda Yhat, Y: np.mean(
    -Y * np.log(Yhat + EPSILON) - (1 - Y) * np.log(1 - Y + 1e-10))


for _ in EPOCHS:
    # Calculate log losses and store them
    # Validating 
    Yhat_v = sigmoid(X_v, W)
    loss_v = log_loss(Yhat_v, Y_v)
    losses_v.append(loss_v)
    # Training
    Yhat_tr = sigmoid(X_tr, W)
    loss_tr = log_loss(Yhat_tr, Y_tr)
    losses_tr.append(loss_tr)

    # Calculate gradiant 
    g = (1 / X_tr.shape[0]) * np.dot(X_tr.T, (Yhat_tr - Y_tr))
    W -= (ETA * g)  # update weight
    
    if abs(prev_loss_tr - loss_tr) < DIFF_THRESHOLD:
        break

    prev_loss_tr = loss_tr

W

array([-1.83471141, -0.07183479, -0.19881311,  0.07916419,  0.82331624,
        0.35581408,  0.24859267,  0.90422718,  0.30682385,  0.09344803,
        0.13969244, -0.06020845, -0.14929327, -0.07094347,  0.04584685,
        0.58814341,  0.60162289,  0.4809633 ,  0.08796408,  0.10482691,
        0.32107353,  0.27991527,  0.12704902,  0.94125381,  0.15537219,
       -2.16686915, -1.02536659, -2.57774042,  0.25355894, -0.94053005,
       -0.04456438, -0.43596104, -0.3788729 , -0.60664306,  0.02163527,
       -0.93626328,  0.27795599,  0.00434759, -0.17730048, -0.25070277,
       -0.1175998 , -1.16188058, -1.4788898 , -0.19584422, -0.90356256,
       -0.63140026, -0.97860837, -0.15651626, -0.54477678, -0.21325357,
       -0.13486023, -0.09177037,  0.62928373,  1.40580601,  1.03189777,
       -0.24626437,  1.13213888,  0.4122811 ])

In [None]:
# Classify our results 
Yhat_tr = sigmoid(X_tr, W)
Yhat_v = sigmoid(X_v, W)
classify = lambda Y: (Y >= 0.5).astype(int)

class_tr = classify(Yhat_tr)
class_v = classify(Yhat_v)