In [3]:
import numpy as np

#Define the cost function
def cost_function(X, Y, W):
  """ Parameters:
  This function finds the Mean Square Error.
  Input parameters:
    X: Feature Matrix
    Y: Target Matrix
    W: Weight Matrix
  Output Parameters:
    cost: accumulated mean square error.
  """
  y_pred = X.dot(W)
  m = len(Y)
  cost = 1/(2* m) * np.sum(np.square(y_pred-Y))

  return cost



In [4]:
# Test case
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])
cost = cost_function(X_test, Y_test, W_test)
if cost == 0:
  print("Proceed Further")
else:
  print("something went wrong: Reimplement a cost function")
print("Cost function output:", cost_function(X_test, Y_test, W_test))

Proceed Further
Cost function output: 0.0


In [5]:
import numpy as np
def gradient_descent(X, Y, W, alpha, iterations):
  """
  Perform gradient descent to optimize the parameters of a linear regression model.
  Parameters:
  X (numpy.ndarray): Feature matrix (m x n).
  Y (numpy.ndarray): Target vector (m x 1).
  W (numpy.ndarray): Initial guess for parameters (n x 1).
  alpha (float): Learning rate.
  iterations (int): Number of iterations for gradient descent.
  Returns:
  tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values
  .
  W_update (numpy.ndarray): Updated parameters (n x 1).
  cost_history (list): History of cost values over iterations.
  """
  # Initialize cost history
  cost_history = [0] * iterations
  # Number of samples
  m = len(Y)
  for iteration in range(iterations):
    # Step 1: Hypothesis Values
    Y_pred =  np.dot(X,W)
    # Step 2: Difference between Hypothesis and Actual Y
    loss =  Y_pred - Y
    # Step 3: Gradient Calculation
    dw = 1/m * np.dot(loss,X)
    # Step 4: Updating Values of W using Gradient
    W_update = W - alpha * dw
    # Step 5: New Cost Value
    cost = cost_function(X, Y, W_update)
    cost_history[iteration] = cost
    W = W_update
  return W_update, cost_history

# Generate random test data
np.random.seed(3) # For reproducibility
X = np.random.rand(100, 3) # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3) # Initial guess for parameters
# Set hyperparameters
alpha = 0.01
iterations = 2000
# Test the gradient_descent function
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)
# Print the final parameters and cost history
print("Final Parameters:", final_params)
print("Cost History:", cost_history)


Final Parameters: [0.21929649 0.38717206 0.29246057]
Cost History: [np.float64(0.19558065489964047), np.float64(0.19325487068565209), np.float64(0.1909662646054197), np.float64(0.18871424204029585), np.float64(0.1864982178823735), np.float64(0.18431761638236407), np.float64(0.1821718709999078), np.float64(0.18006042425627972), np.float64(0.17798272758944964), np.float64(0.17593824121146262), np.float64(0.17392643396809848), np.float64(0.17194678320077775), np.float64(0.16999877461067556), np.float64(0.16808190212500904), np.float64(0.1661956677654635), np.float64(0.16433958151872333), np.float64(0.16251316120907341), np.float64(0.16071593237303813), np.float64(0.15894742813602647), np.float64(0.15720718909094877), np.float64(0.15549476317877575), np.float64(0.15380970557100812), np.float64(0.15215157855402572), np.float64(0.15051995141528626), np.float64(0.14891440033134504), np.float64(0.14733450825766511), np.float64(0.14577986482018992), np.float64(0.14425006620865077), np.float64(0

In [6]:
# Model Evaluation - RMSE
def rmse(Y, Y_pred):
  """
  This Function calculates the Root Mean Squres.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rmse: Root Mean Square.
  """
  m = len(Y)
  rmse = np.sqrt(1/m*np.sum((Y-Y_pred)**2))
  return rmse


# Model Evaluation - R2
def r2(Y, Y_pred):
  """
  This Function calculates the R Squared Error.
  Input Arguments:
    Y: Array of actual(Target) Dependent Varaibles.
    Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
    rsquared: R Squared Error.
  """
  mean_y = np.mean(Y)
  ss_tot = np.sum(np.square(Y-np.mean(Y)))
  ss_res = np.sum(np.square(Y-Y_pred))
  r2 =  1- (ss_res/ss_tot)
  return r2

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
# Main Function
def main():
  # Step 1: Load the dataset
  data = pd.read_csv('/content/drive/MyDrive/AI/student.csv')

  # Step 2: Split the data into features (X) and target (Y)
  X = data[['Math', 'Reading']].values # Features: Math and Reading marks
  Y = data['Writing'].values # Target: Writing marks
  # Step 3: Split the data into training and test sets (80% train, 20% test)
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  # Step 4: Initialize weights (W) to zeros, learning rate and number of iterations
  W = np.zeros(X_train.shape[1]) # Initialize weights
  alpha = 0.0001 # Learning rate
  iterations = 1000 # Number of iterations for gradient descent
  # Step 5: Perform Gradient Descent
  W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

  # Step 6: Make predictions on the test set
  Y_pred = np.dot(X_test, W_optimal)
  # Step 7: Evaluate the model using RMSE and R-Squared
  model_rmse = rmse(Y_test, Y_pred)
  model_r2 = r2(Y_test, Y_pred)
  # Step 8: Output the results
  print("Final Weights:", W_optimal)
  print("Cost History (First 10 iterations):", cost_history[:10])
  print("RMSE on Test Set:", model_rmse)
  print("R-Squared on Test Set:", model_r2)
# Execute the main function
if __name__ == "__main__":
  main()


Final Weights: [0.0894932  0.89504864]
Cost History (First 10 iterations): [np.float64(17.813797177522098), np.float64(16.983149024878305), np.float64(16.925140245010397), np.float64(16.867870818076216), np.float64(16.811093513105355), np.float64(16.754804026075387), np.float64(16.69899816573971), np.float64(16.64367177688582), np.float64(16.588820740001896), np.float64(16.53444097097003)]
RMSE on Test Set: 4.792607360540954
R-Squared on Test Set: 0.908240340333986


The performance is acceptable as the value of R-Squared on the test set is 0.88 which is close to 1

It can be observed that the least error is there when the value of alpha is set to 4 decimal places like 0.0001