In [3]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    cost_history = []

    for _ in range(num_iters):
        # Hypothesis function
        h = np.dot(X, theta)
        
        # Calculate the cost
        cost = (1/(2*m)) * np.sum(np.square(h - y))
        cost_history.append(cost)
        # Update parameters using gradient descent
        gradient = (1/m) * np.dot(X.T, (h - y))
        theta -= alpha * gradient
        # print(theta)

    return theta, cost_history

# Example data
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)  # Feature matrix (including intercept term)
y = np.array([2, 3, 4, 5, 6])  # Target vector

degree = 2
poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(X)
print(X_poly)
# Initial parameters
theta = np.zeros(X_poly.shape[1])  # Initializing theta with zeros
# Learning rate and number of iterations
alpha = 0.001
num_iters = 1000000

# Performing gradient descent
theta_final, cost_history = gradient_descent(X_poly, y, theta, alpha, num_iters)

print("Final parameters:", theta_final)


[[ 1.  1.  1.]
 [ 1.  2.  4.]
 [ 1.  3.  9.]
 [ 1.  4. 16.]
 [ 1.  5. 25.]]
Final parameters: [ 1.00000000e+00  1.00000000e+00 -3.76001075e-13]


In [9]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


def gradient_descent(X, y, theta, mT, alpha, beta, num_iters):
    m = len(y)
    cost_history = []

    for _ in range(num_iters):
        # Hypothesis function
        h = np.dot(X, theta)
        gradient = (1/m) * np.dot(X.T, (h - y))
        mT = beta * mT + (1 - beta) * gradient
        
        # Calculate the cost
        cost = (1/(2*m)) * np.sum(np.square(h - y))
        cost_history.append(cost)
        # Update parameters using gradient descent
        theta -= alpha * mT
        # print(theta)

    return theta, cost_history

# Example data
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)  # Feature matrix (including intercept term)
y = np.array([2, 3, 4, 5, 6])  # Target vector

degree = 2
poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(X)
print(X_poly)
# Initial parameters
theta = np.zeros(X_poly.shape[1])  # Initializing theta with zeros
mT = np.zeros(X_poly.shape[1]) 
# Learning rate and number of iterations
alpha = 0.001
beta = 0.90
num_iters = 5000000

# Performing gradient descent
theta_final, cost_history = gradient_descent(X_poly, y, theta, mT, alpha, beta, num_iters)

print("Final parameters:", theta_final)


[[ 1.  1.  1.]
 [ 1.  2.  4.]
 [ 1.  3.  9.]
 [ 1.  4. 16.]
 [ 1.  5. 25.]]
Final parameters: [ 1.00000000e+00  1.00000000e+00 -1.87565241e-13]


In [7]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


def gradient_descent(X, y, theta, mT, alpha, beta, num_iters):
    m = len(y)
    cost_history = []
    
    tolerance = 1 / (num_iters *100)
    
    for _ in range(num_iters):
        # Hypothesis function
        h = np.dot(X, theta)
        
        error_before = (1/(2*m)) * np.sum(np.square(h - y))
        
        gradient = (1/m) * np.dot(X.T, (h - y))
        mT = beta * mT + (1 - beta) * gradient
        
        theta -= alpha * mT
        # Calculate the cost
        h = np.dot(X, theta)
        
        error_after = (1/(2*m)) * np.sum(np.square(h - y))

        if error_after > error_before:
            theta += alpha * mT
            mT = (mT - (1 - beta) * gradient) / beta
            alpha =  alpha / 2
            continue
        
        # if error_before - error_after <= tolerance:
        #     break
        
            
        # cost_history.append(cost)
        # Update parameters using gradient descent
        # print(theta)

    return theta, cost_history

# Example data
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)  # Feature matrix (including intercept term)
y = np.array([2, 3, 4, 5, 6])  # Target vector

degree = 2
poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(X)
print(X_poly)
# Initial parameters
theta = np.zeros(X_poly.shape[1])  # Initializing theta with zeros
mT = np.zeros(X_poly.shape[1]) 
# Learning rate and number of iterations
alpha = 0.001
beta = 0.90
num_iters = 500000

# Performing gradient descent
theta_final, cost_history = gradient_descent(X_poly, y, theta, mT, alpha, beta, num_iters)

print("Final parameters:", theta_final)


[[ 1.  1.  1.]
 [ 1.  2.  4.]
 [ 1.  3.  9.]
 [ 1.  4. 16.]
 [ 1.  5. 25.]]
Final parameters: [0.02163302 0.07203843 0.27923701]
