In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [41]:
data = pd.read_csv("/content/student.csv")
df = pd.DataFrame(data)

In [42]:
print(df.head())
print(df.tail())
print(df.info())
print(df.describe())

   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB
None
              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.000000    70.000000    69.500000
75%      78.000000    81.000000 

In [43]:
# split data into features
X = data[['Math','Reading']].values
Y = data['Writing'].values

In [44]:
#manual train and test data split
def train_test_split(X,Y,test_size = 0.2):
  n = len(X)
  test_count = int(n*test_size)
  indices = np.random.permutation(n)

  test_indices = indices[:test_count]
  train_indices = indices[test_count:]

  X_train = X[train_indices]
  X_test = X[test_indices]
  Y_train = Y[train_indices]
  Y_test = Y[test_indices]

  return X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [45]:
#Cost Function (MSE)
def cost_function(X,Y,W):
  m = len(Y)
  Y_pred = np.dot(X,W)
  error = Y_pred - Y
  cost = (1/(2*m)) *np.sum(error**2)
  return cost


In [46]:
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

cost = cost_function(X_test, Y_test, W_test)
print("Cost:", cost)


Cost: 0.0


In [47]:
def gradient_descent(X, Y, W, alpha, iterations):
  cost_history = [0]*iterations
  m = len(Y)

  for i in range(iterations):
    Y_pred = np.dot(X, W)
    loss = Y_pred - Y
    dw = (1/m) * np.dot(X.T, loss) # transposed exchange row and column.
    W = W - alpha * dw
    cost_history[i] = cost_function (X, Y, W)

  return W, cost_history


In [48]:
np.random.seed(42)
X = np.random.rand(100, 3)
Y = np.random.rand(100)
W = np.random.rand(3)

final_params, cost_history = gradient_descent(X, Y, W, 0.01, 1000)

In [49]:
def rmse(Y, Y_pred):
    return np.sqrt(np.mean((Y - Y_pred) ** 2))


In [50]:
def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    return 1 - (ss_res / ss_tot)


In [54]:
def main():
    data = pd.read_csv("/content/student.csv")

    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

    W = np.zeros(X_train.shape[1])
    alpha = 0.00001 # learning rate
    iterations = 1000

    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    Y_pred = np.dot(X_test, W_optimal)

    print("Weights:", W_optimal)
    print("RMSE:", rmse(Y_test, Y_pred))
    print("R2:", r2(Y_test, Y_pred))


In [55]:
main()

Weights: [0.35227142 0.64426301]
RMSE: 5.239242570054179
R2: 0.8814904082883996
