In [101]:
# 3.1 Implementation from Scratch Step - by - Step Guide:
# 3.1.1 Step -1- Data Understanding, Analysis and Preparations:
# In this step we will read the data, understand the data, perform some basic data cleaning, and store everything
# in the matrix as shown below.
# • Requirements:
# Dataset → student.csv
# • Decision Process:
# In this step we will define the objective of the task.
# – Objective of the Task -
# To Predict the marks obtained in writing based on the marks of Math and Reading.

In [102]:
# • To - Do - 1:
# 1. Read and Observe the Dataset.
# 2. Print top(5) and bottom(5) of the dataset {Hint: pd.head and pd.tail}.
# 3. Print the Information of Datasets. {Hint: pd.info}.
# 4. Gather the Descriptive info about the Dataset. {Hint: pd.describe}
# 5. Split your data into Feature (X) and Label (Y).

import pandas as pd
import numpy as np

# 1. Read the dataset
df = pd.read_csv("student.csv")

# 2. Print top 5 and bottom 5 rows

print("First 5 Rows:")
print(df.head())

print("\nLast 5 Rows:")
print(df.tail())


First 5 Rows:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62

Last 5 Rows:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72


In [103]:
# 3. Print dataset information
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB
None


In [104]:
# 4. Print descriptive statistics
print(df.describe())

              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.000000    70.000000    69.500000
75%      78.000000    81.000000    79.000000
max     100.000000   100.000000   100.000000


In [105]:
# 5. Split into Features (X) and Label (Y)
X = df[['Math', 'Reading']]
Y = df['Writing']

print("\nFeatures (X):")
print(X.head())

print("\nLabel (Y):")
print(Y.head())


Features (X):
   Math  Reading
0    48       68
1    62       81
2    79       80
3    76       83
4    59       64

Label (Y):
0    63
1    72
2    78
3    79
4    62
Name: Writing, dtype: int64


In [106]:
# • To - Do - 2:
# 1. To make the task easier - let’s assume there is no bias or intercept.
# 2. Create the following matrices:

# Y = WTX

# 3. Note: The feature matrix described above does not include a column of 1s, as it assumes the
# absence of a bias term in the model.



# Feature matrix (X)
X = df[['Math', 'Reading']].values   # shape (n, d)

# Label vector (Y)
Y = df['Writing'].values.reshape(-1, 1)  # shape (n, 1)

# Number of features
d = X.shape[1]

# Initialize weight vector W (random)
W = np.random.rand(d, 1)

# Prediction using matrix form
# Y_pred = XW
Y_pred = np.dot(X, W)

print("Feature Matrix X shape:", X.shape)
print("Weight Matrix W shape:", W.shape)
print("Label Matrix Y shape:", Y.shape)
print("\nPredicted Output (first 5 values):")
print(Y_pred[:5])

Feature Matrix X shape: (1000, 2)
Weight Matrix W shape: (2, 1)
Label Matrix Y shape: (1000, 1)

Predicted Output (first 5 values):
[[ 69.43815261]
 [ 87.48819585]
 [103.99516596]
 [101.99234679]
 [ 79.03829979]]


In [107]:
# • To Do - 3:
# 1. Split the dataset into training and test sets.
# 2. You can use an 80-20 or 70-30 split, with 80% (or 70%) of the data used for training and the rest
# for testing.


# Set split ratio (80-20)
split_ratio = 0.8
split_index = int(len(X) * split_ratio)

# Shuffle data
indices = np.random.permutation(len(X))
X = X[indices]
Y = Y[indices]

# Train-Test split
X_train = X[:split_index]
X_test = X[split_index:]

Y_train = Y[:split_index]
Y_test = Y[split_index:]

print("Training Data Shape:", X_train.shape, Y_train.shape)
print("Testing Data Shape:", X_test.shape, Y_test.shape)

Training Data Shape: (800, 2) (800, 1)
Testing Data Shape: (200, 2) (200, 1)


In [108]:
# TODO -4
# Feel free to build your own code or complete the following code:
# Building a Cost Function:

# Define the cost function
def cost_function(X, Y, W):
    """
    This function finds the Mean Squared Error (MSE).

    Parameters:
    X : Feature Matrix (n x d)
    Y : Target Matrix (n x 1)
    W : Weight Matrix (d x 1)

    Returns:
    cost : Mean Squared Error
    """

    Y = Y.reshape(-1, 1)
    W = W.reshape(-1, 1)

    # Number of training examples
    n = len(Y)

    # Predicted output
    Y_pred = np.dot(X, W)

    # Mean Squared Error
    cost = (1 / n) * np.sum((Y_pred - Y) ** 2)

    return cost


In [109]:
#TODO 5
# Make sure your code at To - Do - 4 passed the following test case:
# Testing a Cost Function:

# Test Example (should output 0)
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

cost = cost_function(X_test, Y_test, W_test)

if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong!")

print("Cost function output:", cost_function(X, Y, W))


Proceed Further
Cost function output: 510.6554894909655


In [110]:
#TODO 6
# Implement your code for Gradient Descent; Either fill the following code or write your own:
# Gradient Descent from Scratch:

def gradient_descent(X, Y, W, alpha=0.01, iterations=10):
  #Ensure inputs are numpay array and coorect shape
  X = np.array(X,dtype=float)
  Y= np.array(Y,dtype=float).reshape(-1,1)
  W= np.array(W,dtype=float).reshape(-1,1)

  m= len(Y)
  cost_history = [0] * iterations  # To store cost at each iteration
  W_update = W.copy()

  for iteration in range(iterations):
    # Step 1: Hypothesis values
      Y_pred = X @ W_update
    # Step 2: Difference between hypothesis and actual Y
      loss = Y_pred - Y
    # Step 3: Gradient calculation
      dw = (1/m) * (X.T @ loss)

    # Step 4: Update W
      W_update = W_update - alpha * dw

    # Step 5: Compute new cost
      # Cost
      cost = cost_function(X, Y, W_update)
      cost_history.append(cost)
    # PRINT one line per iteration
      print(f"Iteration {iteration+1}:")
      print("  Weights:\n", W_update)
      print("  Cost:", cost)
      print("-" * 30)
  return W_update, cost_history


In [111]:
#TODO 7 
# Make sure following Test Case is passe by your code from To - Do - 6 or your Gradient Descent Implementation:
# Test Code for Gradient Descent function:

# Generate random test data
np.random.seed(0)  # For reproducibility
X = np.random.rand(100, 3)   # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3)        # Initial guess

# Hyperparameters
alpha = 0.01
iterations = 10

# Run Gradient Descent
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)

print("Final Parameters:")
print(final_params)

print("\nFinal Cost:", cost_history[-1])
print("Cost History Length:", len(cost_history))


Iteration 1:
  Weights:
 [[0.3996496 ]
 [0.92745322]
 [0.09826523]]
  Cost: 0.21422394189320307
------------------------------
Iteration 2:
  Weights:
 [[0.39805338]
 [0.92562822]
 [0.09692923]]
  Cost: 0.21269761199879803
------------------------------
Iteration 3:
  Weights:
 [[0.39647071]
 [0.9238163 ]
 [0.0956068 ]]
  Cost: 0.21119652631361235
------------------------------
Iteration 4:
  Weights:
 [[0.39490147]
 [0.92201736]
 [0.09429783]]
  Cost: 0.20972025896641117
------------------------------
Iteration 5:
  Weights:
 [[0.39334557]
 [0.92023128]
 [0.09300221]]
  Cost: 0.2082683912857068
------------------------------
Iteration 6:
  Weights:
 [[0.39180287]
 [0.91845795]
 [0.09171981]]
  Cost: 0.2068405116780125
------------------------------
Iteration 7:
  Weights:
 [[0.39027327]
 [0.91669728]
 [0.09045053]]
  Cost: 0.2054362155081552
------------------------------
Iteration 8:
  Weights:
 [[0.38875666]
 [0.91494916]
 [0.08919425]]
  Cost: 0.2040551049816124
-------------------

In [112]:
#TODO 8
# Implementation of RMSE in the Code - Complete the following code or write your own:
# Code for RMSE:


# Model Evaluation - RMSE
def rmse(Y, Y_pred):
    """
    This function calculates the Root Mean Squared Error (RMSE).

    Input Arguments:
    Y      : Array of actual (target) dependent variables
    Y_pred : Array of predicted dependent variables

    Output Arguments:
    rmse   : Root Mean Squared Error
    """

    # Ensure proper shapes
    Y = np.array(Y).reshape(-1, 1)
    Y_pred = np.array(Y_pred).reshape(-1, 1)

    # RMSE calculation
    rmse_value = np.sqrt(np.mean((Y - Y_pred) ** 2))

    return rmse_value


In [113]:
#TODO 9
# Complete the following code or write your own for r2 loss:
# Code for R-Squared Error:


# Model Evaluation - R2
def r2(Y, Y_pred):
    """
    This function calculates the R Squared Error.

    Input Arguments:
    Y      : Array of actual (target) dependent variables
    Y_pred : Array of predicted dependent variables

    Output Arguments:
    r2     : R Squared Error
    """

    # Ensure proper shapes
    Y = np.array(Y).reshape(-1, 1)
    Y_pred = np.array(Y_pred).reshape(-1, 1)

    # Mean of actual values
    mean_y = np.mean(Y)

    # Total Sum of Squares (SS_tot)
    ss_tot = np.sum((Y - mean_y) ** 2)

    # Residual Sum of Squares (SS_res)
    ss_res = np.sum((Y - Y_pred) ** 2)

    # R-squared calculation
    r2_value = 1 - (ss_res / ss_tot)

    return r2_value


In [114]:
#TODO 10
# We will define a function that:
# 1. Loads the data and splits it into training and test sets.
# 2. Prepares the feature matrix (X) and target vector (Y).
# 3. Defines the weight matrix (W) and initializes the learning rate and number of iterations.
# 4. Calls the gradient descent function to learn the parameters.
# 5. Evaluates the model using RMSE and R2
# Re-write the following code or Write your own:

# Compiling everything:


#  Main Function
def main():

    # Step 1: Load dataset
    data = pd.read_csv("student.csv")

    # Step 2: Prepare features (X) and target (Y)
    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    # Step 3: Train-Test Split (80-20)
    np.random.seed(42)
    indices = np.random.permutation(len(X))
    split_index = int(0.8 * len(X))

    train_idx = indices[:split_index]
    test_idx = indices[split_index:]

    X_train, X_test = X[train_idx], X[test_idx]
    Y_train, Y_test = Y[train_idx], Y[test_idx]

    # Step 4: Initialize weights and hyperparameters
    W = np.zeros(X_train.shape[1])
    alpha = 0.0001
    iterations = 10

    # Step 5: Train using Gradient Descent
    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    # Step 6: Make predictions on test data
    Y_pred = np.dot(X_test, W_optimal)

    # Step 7: Evaluate model
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    # Step 8: Output results
    print("Final Weights:\n", W_optimal)
    print("\nCost History (First 10 Iterations):\n", cost_history[:10])
    print("\nRMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)


# Execute
if __name__ == "__main__":
    main()



Iteration 1:
  Weights:
 [[0.48033663]
 [0.49891288]]
  Cost: 35.916694950829864
------------------------------
Iteration 2:
  Weights:
 [[0.48805486]
 [0.51025007]]
  Cost: 33.94509242319125
------------------------------
Iteration 3:
  Weights:
 [[0.48655881]
 [0.51206745]]
  Cost: 33.83518317899331
------------------------------
Iteration 4:
  Weights:
 [[0.4848899 ]
 [0.51369237]]
  Cost: 33.72689862161107
------------------------------
Iteration 5:
  Weights:
 [[0.48322451]
 [0.51530686]]
  Cost: 33.619522977208575
------------------------------
Iteration 6:
  Weights:
 [[0.48156605]
 [0.51691449]]
  Cost: 33.51304835155669
------------------------------
Iteration 7:
  Weights:
 [[0.47991457]
 [0.51851536]]
  Cost: 33.40746718385679
------------------------------
Iteration 8:
  Weights:
 [[0.47827003]
 [0.5201095 ]]
  Cost: 33.30277197685672
------------------------------
Iteration 9:
  Weights:
 [[0.4766324 ]
 [0.52169693]]
  Cost: 33.19895529621589
------------------------------

In [115]:
#TODO 11
# 1. Did your Model Overfitt, Underfitts, or performance is acceptable.
# 2. Experiment with different value of learning rate, making it higher and lower, observe the result.


def evaluate_findings():

    print("\nMODEL FINDINGS\n")

    # Step 1: Load Dataset
    print("Step 1: Loading dataset (student.csv)")
    data = pd.read_csv("student.csv")

    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    print("Dataset loaded successfully.")
    print("Features: Math & Reading")
    print("Target: Writing\n")

    # Step 2: Train-Test Split
    print("Step 2: Splitting dataset into Training (80%) and Testing (20%)")
    np.random.seed(42)
    indices = np.random.permutation(len(X))
    split = int(0.8 * len(X))

    X_train, X_test = X[indices[:split]], X[indices[split:]]
    Y_train, Y_test = Y[indices[:split]], Y[indices[split:]]

    print("Training samples:", len(X_train))
    print("Testing samples:", len(X_test), "\n")

    # Step 3: Experiment with Learning Rates
    print("Step 3: Experimenting with different learning rates")
    print("Purpose: Observe convergence speed and model stability\n")

    learning_rates = [0.00001, 0.0001, 0.001, 0.01]
    iterations = 10
    W_init = np.zeros(X_train.shape[1])

    for alpha in learning_rates:

        print(f"--- Learning Rate: {alpha} ---")

        # Train model
        W_final, cost_history = gradient_descent(
            X_train, Y_train, W_init, alpha, iterations
        )

        # Predictions
        Y_train_pred = np.dot(X_train, W_final)
        Y_test_pred = np.dot(X_test, W_final)

        # Evaluation Metrics
        train_rmse = rmse(Y_train, Y_train_pred)
        test_rmse = rmse(Y_test, Y_test_pred)
        test_r2 = r2(Y_test, Y_test_pred)

        print(f"Training RMSE: {train_rmse:.4f}")
        print(f"Testing RMSE:  {test_rmse:.4f}")
        print(f"Testing R²:    {test_r2:.4f}")

        # Step 4: Explain Model Behavior
        if train_rmse < test_rmse:
            print("Explanation:")
            print("- Training error is lower than testing error.")
            print("- This is expected and indicates NO overfitting.")
            print("- Model performance is acceptable.\n")
        else:
            print("Explanation:")
            print("- Training error is higher than testing error.")
            print("- This may indicate overfitting or instability.\n")

        # Step 5: Learning Rate Interpretation
        if alpha < 0.0001:
            print("Learning Rate Observation:")
            print("- Very small learning rate.")
            print("- Converges slowly but stable.\n")
        elif alpha > 0.001:
            print("Learning Rate Observation:")
            print("- Large learning rate.")
            print("- May cause oscillation or divergence.\n")
        else:
            print("Learning Rate Observation:")
            print("- Balanced learning rate.")
            print("- Faster convergence with stability.\n")

        print("-" * 50)


# Execute 
if __name__ == "__main__":
    evaluate_findings()




MODEL FINDINGS

Step 1: Loading dataset (student.csv)
Dataset loaded successfully.
Features: Math & Reading
Target: Writing

Step 2: Splitting dataset into Training (80%) and Testing (20%)
Training samples: 800
Testing samples: 200 

Step 3: Experimenting with different learning rates
Purpose: Observe convergence speed and model stability

--- Learning Rate: 1e-05 ---
Iteration 1:
  Weights:
 [[0.04803366]
 [0.04989129]]
  Cost: 4013.6789601666273
------------------------------
Iteration 2:
  Weights:
 [[0.09134114]
 [0.09490682]]
  Cost: 3271.549326657264
------------------------------
Iteration 3:
  Weights:
 [[0.13038584]
 [0.13552465]]
  Cost: 2667.813093201697
------------------------------
Iteration 4:
  Weights:
 [[0.16558573]
 [0.17217596]]
  Cost: 2176.662076631642
------------------------------
Iteration 5:
  Weights:
 [[0.19731779]
 [0.20524966]]
  Cost: 1777.1009126182087
------------------------------
Iteration 6:
  Weights:
 [[0.22592205]
 [0.23509652]]
  Cost: 1452.0495