In [None]:
from google.colab import drive
drive.mount('/content/drive')






In [None]:
#Define the cost function
def cost_function(X, Y, W):
  """ Parameters:
  This function finds the Mean Square Error.
  Input parameters:
  X: Feature Matrix
  Y: Target Matrix
  W: Weight Matrix
  Output Parameters:
  cost: accumulated mean square error.
  """
  # Your code here:

  # err = [_ for _ in X]
  # err = np.array(err)

  return (1/(2*(len(Y))))*np.sum((X.dot(W)-Y)**2)

In [None]:
def gradient_descent(X, Y, W, alpha, iterations):
  """
  Perform gradient descent to optimize the parameters of a linear regression model.
  Parameters:
  X (numpy.ndarray): Feature matrix (m x n).
  Y (numpy.ndarray): Target vector (m x 1).
  W (numpy.ndarray): Initial guess for parameters (n x 1).
  alpha (float): Learning rate.
  iterations (int): Number of iterations for gradient descent.
  Returns:
  tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values
  .
  W_update (numpy.ndarray): Updated parameters (n x 1).
  cost_history (list): History of cost values over iterations.
  """
  # Initialize cost history
  cost_history = [0] * iterations
  # Number of samples
  m = len(Y)
  for iteration in range(iterations):
    # Step 1: Hypothesis Values
    Y_pred = X.dot(W)
    # Step 2: Difference between Hypothesis and Actual Y
    loss = Y_pred - Y
    # Step 3: Gradient Calculation
    dw = (loss.dot(X))/(m)
    # Step 4: Updating Values of W using Gradient
    W_update = W - alpha*dw
    # Step 5: New Cost Value
    cost = cost_function(X, Y, W_update)
    cost_history[iteration] = cost

  return W_update, cost_history

In [None]:
import numpy as np

# Model Evaluation - RMSE
def rmse(Y, Y_pred):
  """
  This Function calculates the Root Mean Squres.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rmse: Root Mean Square.
  """
  rmse = np.sqrt(1/(len(Y))*np.sum((Y-Y_pred)**2))
  return rmse

In [None]:
# Model Evaluation - R2
def r2(Y, Y_pred):
  """
  This Function calculates the R Squared Error.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rsquared: R Squared Error.
  """
  mean_y = np.mean(Y)
  ss_tot = np.sum((Y - mean_y)**2)
  ss_res = np.sum((Y - Y_pred)**2)
  r2 = 1 - (ss_res / ss_tot)
  return r2

In [None]:
# Main Function
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

def main():
  # Step 1: Load the dataset
  data = pd.read_csv("/content/student (1).csv")
  # Step 2: Split the data into features (X) and target (Y)
  X = data[['Math', 'Reading']].values # Features: Math and Reading marks
  Y = data['Writing'].values # Target: Writing marks
  # Step 3: Split the data into training and test sets (80% train, 20% test)
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  # Step 4: Initialize weights (W) to zeros, learning rate and number of iterations
  # W = np.zeros(X_train.shape[1]) # Initialize weights
  W = 0.5 * np.random.rand(X_train.shape[1])
  alpha = 0.00001 # Learning rate
  iterations = 1000 # Number of iterations for gradient descent
  # Step 5: Perform Gradient Descent
  W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)
  # Step 6: Make predictions on the test set
  Y_pred = np.dot(X_test, W_optimal)
  # Step 7: Evaluate the model using RMSE and R-Squared
  model_rmse = rmse(Y_test, Y_pred)
  model_r2 = r2(Y_test, Y_pred)
  # Step 8: Output the results
  print("Final Weights:", W_optimal)
  print("Cost History (First 10 iterations):", cost_history[:100])
  print("RMSE on Test Set:", model_rmse)
  print("R-Squared on Test Set:", model_r2)

# Execute the main function
if __name__ == "__main__":
  main()