# Logistic Regression from Scratch (Diabetes Dataset)

In this notebook, Iâ€™m implementing Logistic Regression *from scratch* without sklearn.
The dataset used is the PIMA Diabetes dataset.

---

## 1. Import dependencies Load and explore dataset


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# 2. Train/Test Split
We split the dataset into train and test sets using a custom shuffle function.


In [3]:
def Shuffle_split(data ,test_ratio,seed=42):
    """ Shuffle and split data set into train and test sets.
    
    Args:
        data(Data Frame):Dataset
        test ratio(Float):fraction of test data
        seed(int): random seed for reproducebility

    returns:
        train_set,test_set,(Data Frame ,Data Frame)
    """
    np.random.seed(seed)
    shuffled_indeces = np.random.permutation(len(data))
    shuffle_number = int (test_ratio * len(data))
    test_indeces = shuffled_indeces[:shuffle_number]
    train_indeces = shuffled_indeces[shuffle_number:]

    return data.iloc[train_indeces] , data.iloc[test_indeces]

In [4]:
train_set, test_set = Shuffle_split(df,0.2)

In [5]:
# Separate features and labels
X = train_set.drop("Outcome", axis=1).to_numpy()
Y = train_set["Outcome"].to_numpy().reshape(-1, 1)

# 3. Normalization

In [6]:
def zscore_normalize_features(X):
    """
    Normalize features using z-score method.

    Args:
        X (ndarray): feature matrix

    Returns:
        X_norm, mu, sigma
    """
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    X_norm = (X - mu) / sigma
    return X_norm, mu, sigma


In [7]:
X_norm ,mean ,sigma= zscore_normalize_features(X)

# 4. Sigmoid Function

In [8]:
def sigmoid(z):
    return 1 / ( 1 + np.exp(-z))

# 5. Cost Function

In [9]:
# define a cost function to calculate the error by Mean Square Method.
def compute_cost_logistic(X,Y,w,b):
    """ 
    compute the logistics regression cost.
    Args:
        X (ndarray): shape (m, n) feature matrix
        Y (ndarray): shape (m, 1) labels
        w (ndarray): shape (n, 1) weights
        b (float): bias term
    returns:
        cost:float
    
    """

    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(X[i],w) + b
        f_wb_i = sigmoid(z_i)

        cost += -Y[i] * np.log(f_wb_i) - (1 - Y[i]) * np.log( 1 - f_wb_i)
        
    cost = cost/m
    return cost

In [10]:
w_tmp = np.ones((8,1))
b_tmp = -1.5
X_train = X_norm
y_train = Y
print(compute_cost_logistic(X_train, y_train, w_tmp, b_tmp))

[0.84478499]


## Logistic Gradient Descent


In [11]:
import copy, math

In [12]:
def compute_gradient_logistic(X, y, w, b):
    """
    computes the gradient for logistic regression
    
    Args:
        X (ndarray(m,n)): data, m examples with n features
        y (ndarray(m,)) : target values
        w (ndarray(n,)) : model parameters
        b (scaler)      : model parameter

    returns:
        dj_dw (ndarray (n,)): The Gradient of cost w.r.t parameter w.
        dj_db (scaler): The Gradient of cost w.r.t parameter b.
    """
    
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i],w) + b)           #(n,)(n,)=scaler
        err_i = f_wb_i - y[i]                          # scaler
        for j in range(n):                              
            dj_dw[j] = dj_dw[j] + err_i * X[i,j]       #scaler
        dj_db = dj_db + err_i
    dj_dw = dj_dw/m                                    #(n,)
    dj_db = dj_db/m                                    # scaler

    return dj_db, dj_dw

In [13]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

dj_db: 0.49861806546328574
dj_dw: [0.498333393278696, 0.49883942983996693]


In [18]:
def gradient_descent(X, y, w_in ,b_in , alpha, num_iters):

    J_history = []
    w = copy.deepcopy(w_in)          # avoid modifying global w within function     
    b = b_in

    for i in range (num_iters):
        # Calculate the grandient and Update the parameter
        dj_db,dj_dw = compute_gradient_logistic(X, y, w, b)

        # update the parameter using w, b, alpha and  grradient
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_cost_logistic(X, y, w, b) )

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
            

    return w, b, J_history                     # return final w ,b and J history for graphing     

In [25]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.8
iters = 30000

w_out, b_out, _ = gradient_descent(X_norm, Y, w_tmp, b_tmp, alph, iters)
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")


  dj_dw[j] = dj_dw[j] + err_i * X[i,j]       #scaler


Iteration    0: Cost [0.60612322]   
Iteration 3000: Cost [0.4674387]   
Iteration 6000: Cost [0.4674387]   
Iteration 9000: Cost [0.4674387]   
Iteration 12000: Cost [0.4674387]   
Iteration 15000: Cost [0.4674387]   
Iteration 18000: Cost [0.4674387]   
Iteration 21000: Cost [0.4674387]   
Iteration 24000: Cost [0.4674387]   
Iteration 27000: Cost [0.4674387]   

updated parameters: w:[ 0.21125358  1.09744687 -0.2560578   0.04580222 -0.21048872  0.79296919
  0.23223551  0.42526133], b:[-0.89535574]


In [24]:
print("Final cost:", compute_cost_logistic(X_norm, Y, w_out, b_out))

Final cost: [0.4674387]
