## Binary Classification

In [160]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
df = pd.read_csv("/content/breast-cancer.csv")

df.head(5)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## z-score normalization
is a statistical method used to transform data into a standard normal distribution. 

$$ Z=(X−μ)/σ $$

where, 

    - Z is the Z-score of the data point
    - X is the individual data point.
    - μ (mu) is the mean of the dataset.
    - σ (sigma) is the standard deviation of the dataset.

In [161]:
x_features = df[['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']]

for i in range(x_features.columns.size) :

  ele = df[x_features.columns[i]].copy()
  m, d = ele.mean(), ele.std()
  print([m, d])

  for j in range(len(ele)) :
    entries = ele[j]
    ele[j] = (entries - m)/d

X_train = np.array(x_features)

y_features = df['diagnosis'].copy()

for i in range(len(y_features)) :
  val = y_features[i]
  if val == 'M' :
    y_features[i] = 1.0
  else :
    y_features[i] = 0.0

y_train = np.array(y_features)

print(X_train.shape)

[14.127291739894552, 3.5240488262120775]
[19.289648506151142, 4.301035768166949]
[91.96903339191564, 24.298981038754906]
[654.8891036906855, 351.914129181653]
[0.0963602811950791, 0.01406412813767362]
[0.10434098418277679, 0.052812757932512194]
[0.0887993158172232, 0.07971980870789348]
[0.04891914586994728, 0.038802844859153605]
[0.18116186291739894, 0.027414281336035715]
[0.06279760984182776, 0.007060362795084459]
[0.40517205623901575, 0.2773127329861039]
[2.8660592267135327, 2.0218545540421076]
[40.337079086116, 45.49100551613181]
[0.007040978910369069, 0.0030025179438390656]
[0.025478138840070295, 0.017908179325677388]
[0.03189371634446397, 0.03018606032298841]
[0.011796137082601054, 0.006170285174046869]
[0.02054229876977153, 0.008266371528798399]
[0.0037949038664323374, 0.002646070967089195]
[16.269189806678387, 4.833241580469323]
[25.677223198594024, 6.146257623038319]
[107.26121265377857, 33.602542269036356]
[880.5831282952548, 569.356992669949]
[0.13236859402460457, 0.022832429

### Sigmoid Function

$$ f_{\mathbf{w},b}(x) = g(\mathbf{w}\cdot \mathbf{x} + b)$$
where function $g$ is the sigmoid function. The sigmoid function is defined as:

$$g(z) = \frac{1}{1+e^{-z}}$$

In [146]:
def sigmoid_fn(w,x_i,b) :
  val = -1*(np.dot(w,x_i) + b)
  return 1/(1+np.exp(val))

### Loss Function

 $$loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) = (-y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right)$$

 where $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$, which is the actual label

In [164]:
def loss_fn(w,x_i,b,y_i) :
  eps = 1e-10
  f = sigmoid_fn(w,x_i,b)
  return (-y_i)*(np.log(f+eps)) - (1-y_i)*(np.log(1-f+eps))

### Cost Function

$$ J(\mathbf{w},b) = \frac{1}{m}\sum_{i=0}^{m-1} \left[ loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) \right]$$

In [148]:
def cost_fn(X,w,b,y) :
  m = X.shape[0]
  cost = 0.0
  for i in range(m) :
    cost+=loss_fn(w,X[i],b,y[i])
  cost = cost/m
  return cost

### Gradient Descent

$$\begin{align*}& \text{repeat until convergence:} \; \lbrace \newline \; & b := b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} \newline       \; & w_j := w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{1}  \; & \text{for j := 0..n-1}\newline & \rbrace\end{align*}$$

where,

$$
\frac{\partial J(\mathbf{w},b)}{\partial b}  = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - \mathbf{y}^{(i)})
$$
$$
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - \mathbf{y}^{(i)})x_{j}^{(i)} 
$$

In [149]:
def derivative(X,w,b,y) :
  m = X.shape[0]
  dj_dw, dj_db = 0.0, 0.0
  for i in range(m) :
    dj_db += (sigmoid_fn(w,X[i],b)-y[i])
    dj_dw += (sigmoid_fn(w,X[i],b)-y[i])*X[i]

  return dj_dw/m, dj_db/m

In [165]:
def gradient_descent(X,w,b,y,alpha,itr) :
  w_copy = w
  b_copy = b
  J_history = []
  for i in range(itr) :
    dj_dw, dj_db = derivative(X,w_copy,b_copy,y)
    b_copy = b_copy - alpha * dj_db
    w_copy = w_copy - alpha * dj_dw

    if i<1000000 :
      J_history.append(cost_fn(X,w_copy,b_copy,y))

    if i%1000 == 0:
      print(f"Iteration {i}: Cost {J_history[-1]}")

  return w_copy, b_copy, J_history

In [166]:
#main executing code
w_init = np.zeros(X_train.shape[1])
b_init = 0.0
iterations = 10000
alpha = 1e-3

w_out, b_out, J_hist = gradient_descent(X_train, w_init, b_init, y_train, alpha, iterations)
print(w_out, b_out)

Iteration 0: Cost 14.444022137682863


  return 1/(1+np.exp(val))


Iteration 1000: Cost 1.848004563684795
Iteration 2000: Cost 1.5437679266467697
Iteration 3000: Cost 12.561067745955441
Iteration 4000: Cost 12.36583885738796
Iteration 5000: Cost 2.121514347708256
Iteration 6000: Cost 2.2709343680453586
Iteration 7000: Cost 2.2904498787589893
Iteration 8000: Cost 2.1990467281596158
Iteration 9000: Cost 2.271388811546354
[-6.54367651e-01  3.67965915e-01 -2.88802080e+00 -4.71686366e-01
  8.40804316e-04  3.03670854e-02  5.01659094e-02  1.98080970e-02
  7.33552924e-04 -1.49683425e-03 -7.03854834e-03  1.19256028e-01
  1.20555288e+00  5.74479140e-04  7.98677606e-03  1.11118377e-02
  2.46705150e-03  1.42409228e-03  6.04802948e-04 -6.93690888e-01
  1.01158998e+00 -1.96670580e+00  9.04181760e-01  5.57583756e-03
  1.08375546e-01  1.43452103e-01  3.68694719e-02  1.78032960e-02
  6.45680294e-03] -0.08689004584890368


### Prediction

In [167]:
pred = []
for ele in X_train :
  prediction = sigmoid_fn(w_out, ele, b_out)
  pred.append(prediction)

print(pred)

acc_count = 0
for i in range(len(pred)) :
  res =  y_train[i]
  val = 1.0 if pred[i] > 0.9 else 0.0
  if val == res :
    acc_count+=1

accuracy = (acc_count/len(pred))*100

print(f"Accuracy - {accuracy}%")

[1.0, 1.0, 1.0, 5.514992517143076e-17, 1.0, 1.0, 1.0, 1.0, 0.9999871726874036, 1.0, 1.0, 1.0, 1.0, 6.563086036935115e-17, 4.4570123661453446e-36, 1.0, 1.0, 1.0, 1.0, 1.6035026053992828e-17, 5.770782019835893e-38, 4.1416666875684945e-53, 1.0, 1.0, 1.0, 1.0, 0.9999999999999272, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.430876768175451e-12, 1.024596316602057e-56, 1.0610178417520484e-08, 5.176955079007163e-11, 1.0, 1.2195939021815783e-18, 1.0, 1.0, 0.7082485296411056, 1.0, 4.186986974574663e-49, 0.9999999999997815, 2.781075271806754e-24, 5.256352759133519e-15, 1.215426100367904e-27, 2.945123941420222e-39, 1.2653505872466283e-35, 1.0, 1.0, 1.7061969486766062e-29, 1.0, 1.0, 4.056320927571313e-22, 2.3367236830257685e-50, 2.0953880800486305e-38, 1.5839946063371428e-40, 1.0, 3.352121238906284e-45, 1.0, 1.0, 8.428846448144926e-40, 1.0478323236053884e-38, 9.55592469751289e-38, 1.0198341829304203e-47, 1.0, 5.773357812477401e-49, 1.0, 0.9999998531739622, 1.055042011662108e-17, 1.0, 1.914497428