In [None]:
!pip install ipympl

In [None]:
!git clone "https://github.com/Atharva-Malode/ML-Bootcamp.git"

In [20]:
!cp "/content/ML-Bootcamp/Week-2/Day1/plots_week2.py" "/content/"

In [15]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from ipywidgets import interact
%matplotlib widget
from google.colab import output
output.enable_custom_widget_manager()
from mpl_toolkits.mplot3d import Axes3D
import copy
from matplotlib import animation
from plots_week2 import data_visual_2D, plot_3d_graph, cost_vs_iteration

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate random dataset for each feature
m = 100  # Number of examples
study_hours = np.random.randint(1, 8, size=m)  # Random study hours ranging from 1 to 7
math_score = np.random.randint(40, 100, size=m)  # Random math scores ranging from 40 to 99
science_score = np.random.randint(50, 100, size=m)  # Random science scores ranging from 50 to 99
english_score = np.random.randint(30, 90, size=m)  # Random English scores ranging from 30 to 89
attendance_percentage = np.random.randint(70, 100, size=m)  # Random attendance percentages ranging from 70 to 99

# Combine the features into a single dataset
X = np.column_stack((study_hours, math_score, science_score, english_score, attendance_percentage))

# Generate random target variable (student rank)
y = np.random.randint(1, 6, size=m)  # Random student ranks ranging from 1 to 5

# Provide explanations for each feature
feature_explanations = {
    'study_hours': study_hours,
    'math_score': math_score,
    'science_score': science_score,
    'english_score': english_score,
    'attendance_percentage': attendance_percentage,
    "Student Rank": y,
}
# Create a DataFrame using the dictionary
dataset = pd.DataFrame(feature_explanations)

# Initialize initial values for w and b
n_features = X.shape[1]
w_initial = np.zeros((n_features,))
b_initial = 0.0

alpha = 9e-7  # Learning rate
num_iters = 100  # Number of iterations
dataset

In [None]:
data_visual_2D(n_features, feature_explanations, X,y)

In [None]:
plot_3d_graph(X,y)

# 1 Compute Cost With Multiple Variables
The equation for the cost function with multiple variables $J(\mathbf{w},b)$ is:
$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \tag{1}$$ 
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)} + b  \tag{2} $$ 

In [None]:
def compute_cost(X, y, w, b): 
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    cost = 0.0
    for i in range(m):                                
        f_wb_i = np.dot(X[i], w) + b           #(n,)(n,) = scalar (see np.dot)
        cost = cost + (f_wb_i - y[i])**2       #scalar
    cost = cost / (2 * m)                      #scalar    
    return cost

# 2 Gradient Descent With Multiple Variables

## 2.1 Calculating the error
The Error for every example is calculated as:

$$\text{error}^{(i)} = f_w(x^{(i)}) - y^{(i)}$$

where,
* $f_w(x^{(i)}) = w_0 + w_1x_1^{(i)} + w_2x_2^{(i)} + \ldots + w_nx_n^{(i)}$

In [None]:
def compute_error(x, y, w, b):
    """
    Computes the error for linear regression
    Args:
      x (ndarray (n,)): Data for a single example with n features
      y (scalar)       : target value
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      error (scalar): The error between predicted value and target value.
    """
    error = (np.dot(x, w) + b) - y
    return error

## 2.2 Calculating Gradient

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (\text{error}^{(i)})x_{j}^{(i)} \tag{3}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (\text{error}^{(i)}) \tag{4}
\end{align}
$$
* m is the number of training examples in the data set

    
*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target value

In [10]:
def compute_gradient(X, y, w, b):
    """
    Computes the gradient for linear regression
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b. 
    """
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        error = compute_error(X[i], y[i], w, b)
        
        for j in range(n):
            dj_dw[j] += error * X[i, j]
        dj_db += error

    dj_dw /= m
    dj_db /= m
        
    return dj_db, dj_dw

## 2.3 Gradient descent for multiple variables:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously

In [11]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    """
    Performs batch gradient descent to learn w and b. Updates w and b by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
      """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None
      
        # Save cost J at each iteration
        if i<100:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing

In [None]:
# Perform gradient descent
w_final, b_final, J_history = gradient_descent(X, y, w_initial, b_initial, compute_cost, compute_gradient, alpha, num_iters)

In [None]:
cost_vs_iteration(J_history, num_iters)

In [None]:
plt.close()
# Generate some random data for demonstration
np.random.seed(42)
X = np.random.rand(100, 2)
y = 2 + 3*X[:, 0] - 4*X[:, 1] + np.random.randn(100)

# Perform gradient descent
lr = 0.01  # Learning rate
num_iters = 100

# Define the loss function
def loss_function(theta0, theta1, theta2):
    y_pred = theta0 + theta1*X[:, 0] + theta2*X[:, 1]
    return np.mean((y_pred - y)**2)

# Initialize parameters and history
theta0_hist, theta1_hist, theta2_hist, loss_hist = [], [], [], []
theta0 = 0.0
theta1 = 0.0
theta2 = 0.0

for i in range(num_iters):
    # Calculate gradients
    gradients = [
        np.mean(theta0 + theta1*X[:, 0] + theta2*X[:, 1] - y),
        np.mean((theta0 + theta1*X[:, 0] + theta2*X[:, 1] - y) * X[:, 0]),
        np.mean((theta0 + theta1*X[:, 0] + theta2*X[:, 1] - y) * X[:, 1])
    ]

    # Update parameters
    theta0 -= lr * gradients[0]
    theta1 -= lr * gradients[1]
    theta2 -= lr * gradients[2]

    # Save parameter values and loss for animation
    theta0_hist.append(theta0)
    theta1_hist.append(theta1)
    theta2_hist.append(theta2)
    loss_hist.append(loss_function(theta0, theta1, theta2))

# Set up the figure and axes
fig, ax = plt.subplots()

# Create grid of parameter values
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-10, 10, 100)
theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals)
loss_vals = np.zeros_like(theta0_vals)

for i in range(theta0_vals.shape[0]):
    for j in range(theta0_vals.shape[1]):
        loss_vals[i, j] = loss_function(theta0_vals[i, j], theta1_vals[i, j], 0)  # Fix theta2 = 0 for visualization

# Plot the loss function contour
contour = ax.contour(theta0_vals, theta1_vals, loss_vals, levels=20, cmap='viridis')

# Plot the history of parameter values and steps
line, = ax.plot([], [], 'r', marker='o')

# Function to update the animation
def update_animation(frame):
    line.set_data(theta0_hist[:frame], theta1_hist[:frame])
    contour.collections[0].set_alpha(float(frame) / num_iters)
    return line, contour

# Create animation
animation = animation.FuncAnimation(fig, update_animation, frames=num_iters, interval=100, blit=True)

# Set plot labels
ax.set_xlabel('Theta 0')
ax.set_ylabel('Theta 1')

# Show the plot
plt.show()