In [23]:
import pandas as pd
import math
pima=pd.read_csv("/content/diabetes.csv")

In [24]:
X_train=pima.drop('Outcome',axis=1)
y_train=pima['Outcome']
print(X_train)
print(y_train)


     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766            1      126             60              0        0  30.1   
767            1       93             70             31        0  30.4   

     DiabetesPedigreeFunction  Age  
0                       0.627   50  
1                       0.351   31  


In [25]:
print("X_train",X_train.shape)
print("y_train",y_train.shape)
print(X_train.head())

X_train (768, 8)
y_train (768,)
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  
0                     0.627   50  
1                     0.351   31  
2                     0.672   32  
3                     0.167   21  
4                     2.288   33  


In [26]:
# UNQ_C1
# GRADED FUNCTION: sigmoid
import numpy as np
def sigmoid(z):
    """
    Compute the sigmoid of z

    Args:
        z (ndarray): A scalar, numpy array of any size.

    Returns:
        g (ndarray): sigmoid(z), with the same shape as z

    """
    g = 1/(1+np.exp(-z))

    return g

In [27]:
#Compute Cost
def compute_cost(x,y,w,b):
  m=x.shape[0]
  cost = 0.0
  for i in range(m):
    z=np.dot(x[i],w)+b;
    f_wb=sigmoid(z)
    cost+=-y[i]*np.log(f_wb)-(1-y[i])*(np.log(1-f_wb))
  total_cost=cost/m
  return total_cost

In [28]:
#Function to compute gradient
def compute_gradient(X, y, w, b):
  """
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value
      w : (ndarray Shape (n,))  values of parameters of the model
      b : (scalar)              value of bias parameter of the model
    Returns
      dj_dw : (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w.
      dj_db : (scalar)             The gradient of the cost w.r.t. the parameter b.
    """
  m, n = X.shape
  dj_dw = np.zeros(w.shape)
  dj_db = 0.

  for i in range(m):
    z_wb = np.dot(w,X[i]) + b

    f_wb = sigmoid(z_wb)

    for j in range(n):
      dj_dw[j] = dj_dw[j] + (f_wb-y[i])*(X[i][j])
    dj_db = dj_db + (f_wb-y[i])

  dj_dw = dj_dw/m
  dj_db = dj_db/m

  return dj_dw, dj_db

In [29]:
def gradient_descent(X, y, w_in, b_in, cost_function, compute_gradient, alpha, num_iters):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking
    num_iters gradient steps with learning rate alpha

    Args:
      X :    (ndarray Shape (m, n) data, m examples by n features
      y :    (ndarray Shape (m,))  target value
      w_in : (ndarray Shape (n,))  Initial values of parameters of the model
      b_in : (scalar)              Initial value of parameter of the model
      cost_function :              function to compute cost
      gradient_function :          function to compute gradient
      alpha : (float)              Learning rate
      num_iters : (int)            number of iterations to run gradient descent
    Returns:
      w : (ndarray Shape (n,)) Updated values of parameters of the model after
          running gradient descent
      b : (scalar)                Updated value of parameter of the model after
          running gradient descent
    """

    # number of training examples
    m = len(X)

    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []

    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_dw, dj_db = compute_gradient(X, y, w_in, b_in)

        # Update Parameters using w, b, alpha and gradient
        w_in = w_in - alpha * dj_dw
        b_in = b_in - alpha * dj_db

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion
            cost =  cost_function(X, y, w_in, b_in)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")

    return w_in, b_in, J_history, w_history #return w and J,w history for graphing

In [30]:
np.random.seed(1)
m, n = X_train.shape
initial_w = 0.01 * (np.random.rand(n) - 0.5)
initial_b = -8

# Some gradient descent settings
iterations = 10000
alpha = 0.0001

w,b, J_history,_ = gradient_descent(X_train.values ,y_train, initial_w, initial_b,
                                   compute_cost, compute_gradient, alpha, iterations)

Iteration    0: Cost     2.55   
Iteration 1000: Cost     0.49   
Iteration 2000: Cost     0.48   
Iteration 3000: Cost     0.48   
Iteration 4000: Cost     0.48   
Iteration 5000: Cost     0.48   
Iteration 6000: Cost     0.48   
Iteration 7000: Cost     0.48   
Iteration 8000: Cost     0.48   
Iteration 9000: Cost     0.48   
Iteration 9999: Cost     0.48   


In [40]:
# UNQ_C4
# GRADED FUNCTION: predict

def predict(X, w, b):
    """
    Predict whether the label is 0 or 1 using learned logistic
    regression parameters w

    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      w : (ndarray Shape (n,))  values of parameters of the model
      b : (scalar)              value of bias parameter of the model

    Returns:
      p : (ndarray (m,)) The predictions for X using a threshold at 0.5
    """
    # number of training examples
    m, n = X.shape
    p = np.zeros(m)

    ### START CODE HERE ###
    # Loop over each example
    for i in range(m):
        z_wb = 0
        # Loop over each feature
        for j in range(n):
            # Add the corresponding term to z_wb
            z_wb += X[i, j] * w[j]

        # Add bias term
        z_wb += b

        # Calculate the prediction for this example
        f_wb = sigmoid(z_wb)

        # Apply the threshold
        p[i] = 1 if f_wb>0.5 else 0

    ### END CODE HERE ###
    return p

In [38]:
#Compute accuracy on our training set
p = predict(X_train.values, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

Train Accuracy: 77.213542
