In [3]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# Load the Titanic dataset
data = pd.read_csv('/content/drive/MyDrive/Level5/AI/week5/student.csv')

print(data.head())
print("----"*100)
print(data.tail())
print("----"*100)
print(data.info())
print("----"*100)
print(data.describe())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72
--------------------------------------------------------------------------------------------------------------------------------------------------

In [4]:
X = data[['Math', 'Reading']].values
Y = data['Writing'].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
def cost_function(X, Y, W):
    m = len(Y)
    Y_pred = np.dot(X, W)
    cost = (1 / (2 * m)) * np.sum((Y_pred - Y) ** 2)
    return cost
X_test_case = np.array([[1, 2],
                        [3, 4],
                        [5, 6]])
Y_test_case = np.array([3, 7, 11])
W_test_case = np.array([1, 1])

cost = cost_function(X_test_case, Y_test_case, W_test_case)

if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong")

print("Cost:", cost)

Proceed Further
Cost: 0.0


In [5]:
def gradient_descent(X, Y, W, alpha, iterations):
    cost_history = [0] * iterations
    m = len(Y)

    for i in range(iterations):
        Y_pred = np.dot(X, W)
        loss = Y_pred - Y
        dw = (1 / m) * np.dot(X.T, loss)
        W = W - alpha * dw
        cost_history[i] = cost_function(X, Y, W)
    return W, cost_history

In [6]:
np.random.seed(0)

X_rand = np.random.rand(100, 3)
Y_rand = np.random.rand(100)
W_rand = np.random.rand(3)

alpha = 0.01
iterations = 1000

final_params, cost_history = gradient_descent(
    X_rand, Y_rand, W_rand, alpha, iterations
)

print("Final Parameters:", final_params)
print("Final Cost:", cost_history[-1])

Final Parameters: [0.20551667 0.54295081 0.10388027]
Final Cost: 0.05435492255484332


In [7]:
def rmse(Y, Y_pred):
    return np.sqrt(np.mean((Y - Y_pred) ** 2))

In [8]:
def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    return 1 - (ss_res / ss_tot)


In [9]:
def main():
    W = np.zeros(X_train.shape[1])
    alpha = 0.00001
    iterations = 1000

    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    Y_pred = np.dot(X_test, W_optimal)

    print("Final Weights:", W_optimal)
    print("Cost History (first 10):", cost_history[:10])
    print("RMSE:", rmse(Y_test, Y_pred))
    print("R2 Score:", r2(Y_test, Y_pred))


main()

Final Weights: [0.34811659 0.64614558]
Cost History (first 10): [np.float64(2013.165570783755), np.float64(1640.286832599692), np.float64(1337.0619994901588), np.float64(1090.4794892850578), np.float64(889.9583270083234), np.float64(726.8940993009545), np.float64(594.2897260808594), np.float64(486.4552052951635), np.float64(398.7634463599484), np.float64(327.4517147324688)]
RMSE: 5.2798239764188635
R2 Score: 0.8886354462786421
