In [4]:
def gradient_descent(x, y, w_init, b_init, cost_function, gradient_function, alpha, num_iters):
    """
    
    Performs gradient descent to compute parameters w, b of the model. Updated the parameters by taking 
    num_iters gradient steps with the learning rate alpha.
    
    Args:
        x (ndarray): Shape (m,n) Input to the model (features of students)
        y (ndarray): Shape (m,) Target (final grade G3 of students)
        w_init (ndarray): Shape (n,) Initial parameter of the model
        b_init (scalar):  Initial parameter of the model
        cost_function: Function to compute cost
        gradient_function: Function to compute the gradient of cost
        alpha (float): Learning rate
        num_iters (int): Number of iterations to run gradient descent
    
    Returns:
        w (ndarray): Shape (n,) Updated values of parameters of the model after running gradient descent
        b (scalar): Updated value of parameter of the model after running gradient descent
    """
    
    m = x.shape[0] #number of training examples
    n = x.shape[1] #number of features
    
    J_history = []
    
    w = copy.deepcopy(w_init) #Assigning w and b to initial values
    b = copy.deepcopy(b_init)
    
    for i in range(num_iters):
        
        #Calculate the gradient
        dj_dw, dj_db = gradient_function(x,y,w,b)
        
        #Update the parameters
        for j in range(n):
            w[j] = w[j] - alpha*dj_dw[j]
        b = b - alpha*dj_db
        
        #Save the cost J at each step
        if i < 10000:    #Prevent resource exhaustion
            cost = cost_function(x,y,w,b)
            J_history.append(cost)
    
    return w, b, J_history #Return J history for graphing
        
            
                
        
        
        
    