In [None]:
import pandas as pd

# Read the dataset
df = pd.read_csv("/content/drive/MyDrive/Dataset2/student (1).csv")

# Top 5 rows
print("Top 5 rows:")
print(df.head())

# Bottom 5 rows
print("\nBottom 5 rows:")
print(df.tail())

# Dataset information
df.info()

# Descriptive statistics
df.describe()

# Feature matrix (inputs)
X = df[['Math', 'Reading']]

# Label (target)
Y = df['Writing']

# Display X and Y
print("Feature Matrix (X):")
print(X.head())

print("\nLabel (Y):")
print(Y.head())

Top 5 rows:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62

Bottom 5 rows:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB
Feature Matrix (X):
   Math  Reading
0    48       68
1    62       81
2    79       80
3    76       83
4    59       64

Label (Y):
0    63
1    72
2    78
3    79
4    62
Name: Writing, dtype: int64


In [None]:
import numpy as np

X = df[['Math', 'Reading']].to_numpy().T

d = X.shape[0]
W = np.zeros(d)

Y = df['Writing'].to_numpy()

Y_pred = np.dot(W.T, X)

print("W shape:", W.shape)        # (d,)
print("X shape:", X.shape)        # (d, n)
print("Y shape:", Y.shape)        # (n,)
print("Y_pred shape:", Y_pred.shape)  # (n,)

W shape: (2,)
X shape: (2, 1000)
Y shape: (1000,)
Y_pred shape: (1000,)


In [None]:
# Number of samples
n = X.shape[1]

# 80% split
train_size = int(0.8 * n)

X_train = X[:, :train_size]
X_test = X[:, train_size:]

Y_train = Y[:train_size]
Y_test = Y[train_size:]

print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)

print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (2, 800)
Y_train shape: (800,)
X_test shape: (2, 200)
Y_test shape: (200,)


In [None]:
import numpy as np

def cost_function(X,Y,W):
    m = len(Y)
    cost = np.sum((X.dot(W)-Y)**2)/(2*m)
    return cost

In [None]:
def gradient_descent(X, Y, W, alpha, iterations):
  """
  Perform gradient descent to optimize the parameters of a linear regression model.
  Parameters:
  X (numpy.ndarray): Feature matrix (m x n).
  Y (numpy.ndarray): Target vector (m x 1).
  W (numpy.ndarray): Initial guess for parameters (n x 1).
  alpha (float): Learning rate.
  iterations (int): Number of iterations for gradient descent.
  Returns:
  tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values
  .
  W_update (numpy.ndarray): Updated parameters (n x 1).
  cost_history (list): History of cost values over iterations.
  """
  # Initialize cost history
  cost_history = [0] * iterations
  # Number of samples
  m = len(Y)
  for iteration in range(iterations):
    # Step 1: Hypothesis Values
    Y_pred = X.dot(W)
    # Step 2: Difference between Hypothesis and Actual Y
    loss = Y_pred - Y
    # Step 3: Gradient Calculation
    dw = (X.T.dot(loss))/m
    # Step 4: Updating Values of W using Gradient
    W_update = W - alpha*dw
    # Step 5: New Cost Value
    cost = cost_function(X, Y, W_update)
    cost_history[iteration] = cost
  return W_update, cost_history
# Generate random test data
np.random.seed(0) # For reproducibility
X = np.random.rand(100, 3) # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3) # Initial guess for parameters
# Set hyperparameters
alpha = 0.01
iterations = 1000
# Test the gradient_descent function
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)
# Print the final parameters and cost history
print("Final Parameters:", final_params)
print("Cost History:", cost_history)

Final Parameters: [0.3996496  0.92745322 0.09826523]
Cost History: [np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float64(0.10711197094660153), np.float

In [None]:
# Model Evaluation - RMSE
def rmse(Y, Y_pred):
  """
  This Function calculates the Root Mean Squres.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rmse: Root Mean Square.
  """
  rmse = np.sqrt(np.mean((Y-Y_pred)**2))
  return rmse

In [None]:
# Model Evaluation - R2
def r2(Y, Y_pred):
  """
  This Function calculates the R Squared Error.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rsquared: R Squared Error.
  """
  mean_y = np.mean(Y)
  ss_tot = np.sum((Y-mean_y)**2)
  ss_res = np.sum((Y-Y_pred)**2)
  r2 = 1 - (ss_res / ss_tot)
  return r2

In [None]:
def main():
    # Step 1: Load dataset
    data = pd.read_csv('/content/drive/MyDrive/Dataset2/student (1).csv')

    # Step 2: Feature matrix and target vector
    X = data[['Math', 'Reading']].values    # (samples, features)
    Y = data['Writing'].values              # (samples,)

    # Step 3: Manual 80-20 train-test split
    n_samples = X.shape[0]
    train_size = int(0.8 * n_samples)
    X_train = X[:train_size, :]    # (samples_train, features)
    X_test  = X[train_size:, :]    # (samples_test, features)
    Y_train = Y[:train_size]
    Y_test  = Y[train_size:]

    # Step 4: Initialize weights, learning rate, iterations
    W = np.zeros(X_train.shape[1])  # (features,)
    alpha = 0.00001
    iterations = 1000

    # Step 5: Perform Gradient Descent
    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    # Step 6: Predictions on the test set
    Y_pred = X_test.dot(W_optimal)  # (samples_test,)

    # Step 7: Evaluate model
    model_rmse = rmse(Y_test, Y_pred)
    model_r2   = r2(Y_test, Y_pred)

    # Step 8: Output results
    print("Final Weights:", W_optimal)
    print("Cost History (First 10 iterations):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)

# Execute main
if __name__ == "__main__":
    main()


Final Weights: [0.04808488 0.05019283]
Cost History (First 10 iterations): [np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822), np.float64(2020.584425936822)]
RMSE on Test Set: 62.86219395708683
R-Squared on Test Set: -18.42370623358185
