<h1 style="font-size:3rem;color:skyblue;">Advanced Lab: Python, NumPy, and Linear Regression Conceptsh1

In [41]:
import numpy as np

In [43]:
# Create arrays
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])
print(f"a = {a}")
print(f"b = {b}")

a = [1 2 3 4]
b = [5 6 7 8]


In [45]:
# Element-wise operations
c = a + b
d = a * b
print(f"a + b = {c}")
print(f"a * b = {d}")

# Dot product
e = np.dot(a, b)
print(f"np.dot(a,b) = {e}")

a + b = [ 6  8 10 12]
a * b = [ 5 12 21 32]
np.dot(a,b) = 70


#### Vectorization VS Loops

In [53]:
import time

def loop_dot_product(a, b):
    result = 0
    for i in range(len(a)):
        result += a[i] * b[i]
    return result

# Large vectors for performance comparison
large_a = np.random.rand(1000000)
large_b = np.random.rand(1000000)

# Vectorized version
start_time = time.time()
np_result = np.dot(large_a, large_b)
np_time = time.time() - start_time

# Loop Version
start_time = time.time()
loop_result = loop_dot_product(large_a, large_b)
loop_time = time.time() - start_time

print(f"NumPy time: {np_time:.6f} seconds")
print(f"Loop time: {loop_time:.6f} seconds")
print(f"Speedup: {loop_time / np_time:.2f}x")

NumPy time: 0.006994 seconds
Loop time: 0.273730 seconds
Speedup: 39.14x


#### Implementation
##### Data Preparation

In [66]:
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3], [3, 3], [3, 4]])
y = np.array([250, 300, 320, 360, 400, 450])

print("x shape:", X.shape)
print("y shape:", y.shape)

x shape: (6, 2)
y shape: (6,)


 #### Hypothesis and Cost Function

In [69]:
def hypothesis(X, theta):
    return np.dot(X, theta)

def compute_cost(X, y, theta):
    m = len(y)
    predictions = hypothesis(X, theta)
    cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2)
    return cost

#### Gradient Descent

In [74]:
def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    j_history = []

    for _ in range (num_iters):
        predictions = hypothesis(X, theta)
        theta = theta - (alpha / m) * np.dot(X.T, (predictions - y))
        j_history.append(compute_cost(X, y, theta))
    return theta, j_history

# Add bias term to x
X_b = np.c_[np.ones((X.shape[0], 1)), X]
theta = np.zeros(X_b.shape[1])

# Run gradient descent
alpha = 0.01
num_iters = 1000
theta, j_history = gradient_descent(X_b, y, theta, alpha, num_iters)

print("Optimized theta: ", theta)
print("Final cost:", j_history[-1])

Optimized theta:  [127.54510416  35.58721977  57.0452545 ]
Final cost: 145.21369802516568


#### Feature Scaling and Learning Rate

In [77]:
def normalize_z_score(X):
    return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

def normalize_min_max(X):
    return (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))

# Apply Z-score normalize
X_normalized_z = normalize_z_score(X)

# Apply Min-Max normalization
X_normalized_mm = normalize_min_max(X)

print("Z-score normalized X: ")
print(X_normalized_z)

print("\nMin-Max normalized X:")
print(X_normalized_mm)

Z-score normalized X: 
[[-1.22474487 -1.5666989 ]
 [-1.22474487 -0.52223297]
 [ 0.         -0.52223297]
 [ 0.          0.52223297]
 [ 1.22474487  0.52223297]
 [ 1.22474487  1.5666989 ]]

Min-Max normalized X:
[[0.         0.        ]
 [0.         0.33333333]
 [0.5        0.33333333]
 [0.5        0.66666667]
 [1.         0.66666667]
 [1.         1.        ]]


##### Learning Rate Experimentations

In [90]:
learning_rates = [0.0001, 0.001, 0.01, 0.1]
num_iters = 1000

for alpha in learning_rates:
    theta = np.zeros(X_b.shape[1])
    theta, j_history = gradient_descent(X_b, y, theta, alpha, num_iters)

    print(f"\nLearning rate: {alpha}")
    print(f"Final cost: {j_history[-1]}")
    print(f"Optimized theta: {theta}")


Learning rate: 0.0001
Final cost: 6238.036943740139
Optimized theta: [20.44512837 42.08305931 52.6789439 ]

Learning rate: 0.001
Final cost: 1096.3935809601899
Optimized theta: [42.3799654  55.23599469 71.4036938 ]

Learning rate: 0.01
Final cost: 145.21369802516568
Optimized theta: [127.54510416  35.58721977  57.0452545 ]

Learning rate: 0.1
Final cost: 13.888889055967997
Optimized theta: [173.33190135  28.33287061  46.66754512]
