## Gradient Descent

## load the train data

In [2]:
import numpy as np
train_data=np.loadtxt(r"C:\Users\srish\Downloads\0000000000002417_training_boston_x_y_train.csv",delimiter=',')
x_train=train_data[:, :-1]
y_train = train_data[:, -1]
print(x_train.shape)

(379, 13)


## load the test data

In [3]:
test_data=np.loadtxt(r"C:\Users\srish\Downloads\0000000000002417_test_boston_x_test.csv",delimiter=',')

## feature scaling

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(test_data)


## initialize parameters

In [5]:
def initialize_parameters(n):
    W = np.zeros(n)
    b = 0
    return W, b

## compute cost

In [6]:
def compute_cost(X, Y, W, b):
    m = X.shape[0]
    cost = np.sum((np.dot(X, W) + b - Y) ** 2) / (2 * m)
    return cost

## gradient function

In [7]:
def gradient_descent(X, Y, W, b, learning_rate, num_iterations):
    m = X.shape[0]
    cost_history = []

    for i in range(num_iterations):
        Y_pred = np.dot(X, W) + b
        dW = (1/m) * np.dot(X.T, (Y_pred - Y))
        db = (1/m) * np.sum(Y_pred - Y)
        
        W -= learning_rate * dW
        b -= learning_rate * db
        
        cost = compute_cost(X, Y, W, b)
        cost_history.append(cost)
        
        if i % 100 == 0:
            print(f'Iteration {i}: Cost {cost}')
    
    return W, b, cost_history

## model training 

In [45]:
n_features = X_train_scaled.shape[1]
W, b = initialize_parameters(n_features)

# Set hyperparameters
learning_rate = 0.001
num_iterations = 10000

# Run gradient descent
W, b, cost_history = gradient_descent(X_train_scaled, y_train, W, b, learning_rate, num_iterations)

Iteration 0: Cost 298.8272044797119
Iteration 100: Cost 238.58337488295624
Iteration 200: Cost 194.96043348189167
Iteration 300: Cost 160.99141591398828
Iteration 400: Cost 133.73886123407397
Iteration 500: Cost 111.62186496132942
Iteration 600: Cost 93.59324486684422
Iteration 700: Cost 78.87103528561157
Iteration 800: Cost 66.83896816497757
Iteration 900: Cost 57.00079570234554
Iteration 1000: Cost 48.9536030020327
Iteration 1100: Cost 42.36920505569396
Iteration 1200: Cost 36.97993229692506
Iteration 1300: Cost 32.567334889263925
Iteration 1400: Cost 28.953066894375187
Iteration 1500: Cost 25.99148552959303
Iteration 1600: Cost 23.563628327192525
Iteration 1700: Cost 21.572306345726577
Iteration 1800: Cost 19.93810429630061
Iteration 1900: Cost 18.596118695031155
Iteration 2000: Cost 17.49329705405777
Iteration 2100: Cost 16.586266787015134
Iteration 2200: Cost 15.839563273046833
Iteration 2300: Cost 15.224183372572815
Iteration 2400: Cost 14.716404374731608
Iteration 2500: Cost 14.

## prediction

In [43]:
def predict(X, W, b):
    return np.dot(X, W) + b

# Predict on the test data
Y_test_pred = predict(X_test_scaled, W, b)
print(Y_test_pred)

[12.42563469 29.00370197 22.37586324 24.45347438 20.64679968  2.73890068
 30.35699344 24.84929752 18.66924766 23.53947243 24.11308202 17.72267047
 17.49445761 21.67612632 42.32785968 23.87245073 24.47450784 27.57001734
 20.25695143 31.18228165 23.83064278 24.99990926 33.95158663 36.42336491
 32.05994495 16.69334724 23.48163516 32.98386153 25.14992138 33.71210506
 16.89819873 26.05586664 23.27834218 25.47225992 15.00813484 29.5994632
 26.24835458 20.41312548 24.40798732  9.44990555  8.36503245 29.00274454
 29.60467485 19.77255048 20.36095518  3.1353675  39.52084142 25.71787804
 30.32731743 16.78773249 17.88054069 40.97489295 17.56568531 20.92938995
 15.60891378 21.391346   18.45366591 23.15805401 13.68374184 17.22540509
 15.03756912 29.15189681 25.20530962 25.49639361 17.19744897 17.42356427
 34.69860784 17.00537975 27.07977108 22.53156239 29.29051228 27.10377132
 17.73756061  5.73819461 36.8382083  25.09787934 30.1232945  27.21600933
 16.2461476  32.65921002 19.26347292 22.69537921 22.

## saving predicted file

In [47]:
np.savetxt('predictionstest.csv', Y_test_pred, fmt='%.5f')