## GRADIENT Descent for Linear Regression 

# numpy : is popular library for scientific computaing 
# matplotlib : it's also a popular library for plotting data 



In [9]:
import math , copy 
import numpy as np 
import matplotlib.pyplot as plt 


In [2]:
x_train = np.array([1.0, 2.0])
y_train = np.array([300.0 , 500.0])

In [5]:
def compute_cost(x , y , w ,b ):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        f_wb = w*x[i]+b 
        cost = cost + (f_wb - y)**2
    cost_total_value = 1 /(2*m)*cost

    return cost_total_value


# Gradient Descent for Linear Regression 

In [6]:
def compute_gradient(x , y ,w ,b ): 
    
    m = x.shape[0]
    dj_dw = 0
    dj_db =0 

    for i in range(m):
        f_wb = w*x[i]+ b 
        dj_dw_i = (f_wb - y[i])*x[i]
        dj_db_i = (f_wb - y[i])
    dj_dw = (dj_dw_i)/ m
    dj_db = (dj_db_i) / m 

    return dj_db , dj_dw




In [12]:
import numpy as np
import math

def compute_cost(x, y, w, b):
    """
    Computes the cost function for linear regression.
    
    Args:
      x (ndarray (m,))  : Data, m examples 
      y (ndarray (m,))  : Target values
      w (scalar)        : Model parameter
      b (scalar)        : Model parameter
      
    Returns:
      cost (float): The cost of using w, b as parameters for linear regression.
    """
    m = len(x)
    cost = (1 / (2 * m)) * np.sum((w * x + b - y) ** 2)
    return cost

def compute_gradient(x, y, w, b):
    """
    Computes the gradient for linear regression.
    
    Args:
      x (ndarray (m,)): Data, m examples 
      y (ndarray (m,)): Target values
      w,b (scalar)    : Model parameters
      
    Returns:
      dj_dw (scalar): The gradient of the cost w.r.t. the parameter w
      dj_db (scalar): The gradient of the cost w.r.t. the parameter b     
    """
    
    m = len(x)
    dj_dw = (1 / m) * np.sum((w * x + b - y) * x)
    dj_db = (1 / m) * np.sum(w * x + b - y)
    
    return dj_dw, dj_db

def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    """
    Performs gradient descent to fit w,b. Updates w,b by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      x (ndarray (m,))  : Data, m examples 
      y (ndarray (m,))  : Target values
      w_in,b_in (scalar): Initial values of model parameters  
      alpha (float)     : Learning rate
      num_iters (int)   : Number of iterations to run gradient descent
      cost_function     : Function to call to produce cost
      gradient_function : Function to call to produce gradient
      
    Returns:
      w (scalar): Updated value of parameter after running gradient descent
      b (scalar): Updated value of parameter after running gradient descent
      J_history (List): History of cost values
      p_history (List): History of parameters [w,b] 
    """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    p_history = []
    
    b = b_in
    w = w_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w, b)

        # Update Parameters using the gradient descent rule
        b = b - alpha * dj_db
        w = w - alpha * dj_dw

        # Save cost J at each iteration
        if i < 100000:  # prevent resource exhau stion
            J_history.append(cost_function(x, y, w, b))
            p_history.append([w, b])

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e}  ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    
    return w, b, J_history, p_history  # return w, b and cost, w history for graphing


In [13]:
# Initialiser les paramètres
w_in = 0  # valeur initiale pour w
b_in = 0  # valeur initiale pour b
alpha = 0.01  # taux d'apprentissage
num_iters = 1000  # nombre d'itérations

# Exemple de données d'entraînement (x_train, y_train)
x_train = np.array([1, 2, 3, 4, 5])  # Taille
y_train = np.array([2, 4, 6, 8, 10])  # Prix

# Exécuter la descente de gradient
w, b, J_history, p_history = gradient_descent(x_train, y_train, w_in, b_in, alpha, num_iters, compute_cost, compute_gradient)

print(f"Final values: w = {w}, b = {b}")


Iteration    0: Cost 1.71e+01   dj_dw: -2.200e+01, dj_db: -6.000e+00   w:  2.200e-01, b: 6.00000e-02
Iteration  100: Cost 1.71e-02   dj_dw: -2.042e-02, dj_db:  7.342e-02   w:  1.880e+00, b: 4.33677e-01
Iteration  200: Cost 1.22e-02   dj_dw: -1.717e-02, dj_db:  6.201e-02   w:  1.899e+00, b: 3.66176e-01
Iteration  300: Cost 8.70e-03   dj_dw: -1.450e-02, dj_db:  5.235e-02   w:  1.914e+00, b: 3.09179e-01
Iteration  400: Cost 6.20e-03   dj_dw: -1.224e-02, dj_db:  4.421e-02   w:  1.928e+00, b: 2.61055e-01
Iteration  500: Cost 4.42e-03   dj_dw: -1.034e-02, dj_db:  3.732e-02   w:  1.939e+00, b: 2.20421e-01
Iteration  600: Cost 3.15e-03   dj_dw: -8.729e-03, dj_db:  3.152e-02   w:  1.948e+00, b: 1.86112e-01
Iteration  700: Cost 2.25e-03   dj_dw: -7.370e-03, dj_db:  2.661e-02   w:  1.956e+00, b: 1.57143e-01
Iteration  800: Cost 1.60e-03   dj_dw: -6.223e-03, dj_db:  2.247e-02   w:  1.963e+00, b: 1.32683e-01
Iteration  900: Cost 1.14e-03   dj_dw: -5.255e-03, dj_db:  1.897e-02   w:  1.969e+00, b: 1.

In [52]:
# let's rebuild this thing again 


def compute_cost(x , y, w , b ):

    m = x.shape[0]

    f_wb = w*x + b
    cost_function = np.sum((f_wb-y)**2)
    total_costFunction = (1 / (2*m)*(cost_function))

    return total_costFunction  


def compute_gradient(x , y, w ,b ): 

    m = len(x)

    dj_dw = 0 
    dj_db = 0 

    dj_dw =  np.sum(x * (w*x + b - y))
    dj_db = np.sum((w*x +b - y))

    dj_dw = dj_dw / m 
    dj_db = dj_db / m

    return dj_dw, dj_db




def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
    b = b_in
    w= w_in

    J_history = []
    p_history = []
    for i in range(num_iters):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w, b)

        # Update Parameters using the gradient descent rule
        b = b - alpha * dj_db
        w = w - alpha * dj_dw

        # Save cost J at each iteration
        if i < 100000:  # prevent resource exhaustion
            J_history.append(cost_function(x, y, w, b))
            p_history.append([w, b])

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e}  ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    
    return w, b, J_history, p_history  # return w, b and cost, w history for graphing

In [53]:
# Initialiser les paramètres
w_in = 0  # valeur initiale pour w
b_in = 0  # valeur initiale pour b
alpha = 0.01  # taux d'apprentissage
num_iters = 1000  # nombre d'itérations

# Exemple de données d'entraînement (x_train, y_train)
x_train = np.array([1, 2, 3, 4, 5])  # Taille
y_train = np.array([2, 4, 6, 8, 10])  # Prix

# Exécuter la descente de gradient
w, b, J_history, p_history = gradient_descent(x_train, y_train, w_in, b_in, alpha, num_iters, compute_cost, compute_gradient)

print(f"Final values: w = {w}, b = {b}")


Iteration    0: Cost 1.71e+01   dj_dw: -2.200e+01, dj_db: -6.000e+00   w:  2.200e-01, b: 6.00000e-02
Iteration  100: Cost 1.71e-02   dj_dw: -2.042e-02, dj_db:  7.342e-02   w:  1.880e+00, b: 4.33677e-01
Iteration  200: Cost 1.22e-02   dj_dw: -1.717e-02, dj_db:  6.201e-02   w:  1.899e+00, b: 3.66176e-01
Iteration  300: Cost 8.70e-03   dj_dw: -1.450e-02, dj_db:  5.235e-02   w:  1.914e+00, b: 3.09179e-01
Iteration  400: Cost 6.20e-03   dj_dw: -1.224e-02, dj_db:  4.421e-02   w:  1.928e+00, b: 2.61055e-01
Iteration  500: Cost 4.42e-03   dj_dw: -1.034e-02, dj_db:  3.732e-02   w:  1.939e+00, b: 2.20421e-01
Iteration  600: Cost 3.15e-03   dj_dw: -8.729e-03, dj_db:  3.152e-02   w:  1.948e+00, b: 1.86112e-01
Iteration  700: Cost 2.25e-03   dj_dw: -7.370e-03, dj_db:  2.661e-02   w:  1.956e+00, b: 1.57143e-01
Iteration  800: Cost 1.60e-03   dj_dw: -6.223e-03, dj_db:  2.247e-02   w:  1.963e+00, b: 1.32683e-01
Iteration  900: Cost 1.14e-03   dj_dw: -5.255e-03, dj_db:  1.897e-02   w:  1.969e+00, b: 1.

## Linear Regression for Multiple features 

In [2]:
# introduction to numpy for multiple linear regression 
import numpy as np 
import time 

In [3]:
# NumPy routines which allocate memory and fill arrays with value
a = np.zeros(4);                print(f"np.zeros(4) :   a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.zeros((4,));             print(f"np.zeros(4,) :  a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.random.random_sample(4); print(f"np.random.random_sample(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

np.zeros(4) :   a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
np.zeros(4,) :  a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
np.random.random_sample(4): a = [0.62793365 0.35974625 0.78114418 0.03329643], a shape = (4,), a data type = float64


In [8]:
a = np.arange(4.)
print(f" values of {a}")
a = np.random.rand(4)
print(f"New values of variable a is {a}")

# operations on vectors 

a = np.arange(10)
print(f"values of {a}")

# access an elment of a 

a_1 = a[0]
print(f"the first element of a is {a_1}")



 values of [0. 1. 2. 3.]
New values of variable a is [0.64314563 0.96526732 0.48313028 0.64264979]
values of [0 1 2 3 4 5 6 7 8 9]
the first element of a is 0


In [14]:
def dot_function(a , b): 
    "compute dot product for vector with the first solution for loop "

    x = 0 
    m = a.shape[0]

    for i in range(m):
        x = x + a[i]*b[i]
    return x 

    

In [15]:
# test-1 
a = np.array([41, 78 ,89])
b = np.array([85, 45 ,763])

print(f"value of our dot function : {dot_function(a,b)}")

value of our dot function : 74902


In [17]:
# Now let's try the second solution with numpy library dot function 

a = np.array([41, 78 ,89])
b = np.array([85, 45 ,763])

c = np.dot(a, b)

print(f"NumPy 1-D np.dot(a, b) = {c}, np.dot(a, b).shape = {c.shape} ") 
c = np.dot(b, a)
print(f"NumPy 1-D np.dot(b, a) = {c}, np.dot(a, b).shape = {c.shape} ")


NumPy 1-D np.dot(a, b) = 74902, np.dot(a, b).shape = () 
NumPy 1-D np.dot(b, a) = 74902, np.dot(a, b).shape = () 


In [18]:
# vector vs loop function 

# vector are speed more than for loop function 

np.random.seed(1)
a = np.random.rand(10000000)
b = np.random.rand(10000000)

tic = time.time() # capture start time 
c = np.dot(a ,b )
toc = time.time() # capture the end time 
print(f"np.dot(a, b)= {c:.4f}")
print(f"Vectorized version duration : {1000*(toc-tic):.4}ms")


tic =  time.time() # capture start time 

c = dot_function(a,b)
toc = time.time()  # capture the end time 


print(f"my dot function(a , b)= {c:.4}")
print(f"loop version duration :{1000*(toc-tic):.4} ms")

del(a);del(b) # remove these big arrays from memory 

np.dot(a, b)= 2501072.5817
Vectorized version duration : 15.62ms
my dot function(a , b)= 2.501e+06
loop version duration :4.608e+03 ms


In [19]:
def dotFunction(a, b): 
    "our manual dot function with for loop "

    dot_value = 0
    m = len(a)

    for i in range(m):
        dot_value += a*[i]*b[i]
    return dot_value