-
-
Notifications
You must be signed in to change notification settings - Fork 46.5k
/
Copy pathridge_regression.py
119 lines (93 loc) · 3.46 KB
/
ridge_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets
# Ridge Regression function
# reference : https://en.wikipedia.org/wiki/Ridge_regression
def ridge_cost_function(
x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float
) -> float:
"""
Compute the Ridge regression cost function with L2 regularization.
J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n)
Where:
- J(θ) is the cost function we aim to minimize
- m is the number of training examples
- hθ(x) = X * θ (prediction)
- y_i is the actual target value for example i
- a is the regularization parameter
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@returns: The computed cost value
"""
m = len(y)
predictions = np.dot(x, theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(
theta[1:] ** 2
)
return cost
def ridge_gradient_descent(
x: np.ndarray,
y: np.ndarray,
theta: np.ndarray,
alpha: float,
learning_rate: float,
max_iterations: int,
) -> np.ndarray:
"""
Perform gradient descent to minimize the
cost function and fit the Ridge regression model.
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The initial parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@param learning_rate: The learning rate for gradient descent
@param max_iterations: The number of iterations for gradient descent
@returns: The optimized parameters (weights) of the model (n,)
"""
m = len(y)
for iteration in range(max_iterations):
predictions = np.dot(x, theta)
error = predictions - y
# calculate the gradient
gradient = (1 / m) * np.dot(x.T, error)
gradient[1:] += (alpha / m) * theta[1:]
theta -= learning_rate * gradient
if iteration % 100 == 0:
cost = ridge_cost_function(x, y, theta, alpha)
print(f"Iteration {iteration}, Cost: {cost}")
return theta
if __name__ == "__main__":
import doctest
doctest.testmod()
# Load California Housing dataset
california_housing = datasets.fetch_california_housing()
x = california_housing.data[:, :2] # 2 features for simplicity
y = california_housing.target
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
# Add a bias column (intercept) to X
x = np.c_[np.ones(x.shape[0]), x]
# Initialize parameters (theta)
theta_initial = np.zeros(x.shape[1])
# Set hyperparameters
alpha = 0.1
learning_rate = 0.01
max_iterations = 1000
optimized_theta = ridge_gradient_descent(
x, y, theta_initial, alpha, learning_rate, max_iterations
)
print(f"Optimized theta: {optimized_theta}")
# Prediction
def predict(x, theta):
return np.dot(x, theta)
y_pred = predict(x, optimized_theta)
# Plotting the results (here we visualize predicted vs actual values)
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, color="b", label="Predictions vs Actual")
plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Ridge Regression: Actual vs Predicted Values")
plt.legend()
plt.show()