In [1]:
import pandas as pd
import numpy as np
import copy, math
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [2]:
def load_training_data(dataset_path):
    dataset = pd.read_csv(dataset_path, index_col=0)
    y = dataset[['Price 10^3 TMT']].to_numpy().ravel()
    dataset.drop("Pubtime", axis=1, inplace=True)
    dataset.drop("Price 10^3 TMT", axis=1, inplace=True)
    X = dataset.to_numpy()

    return X, y

In [3]:
X, y  = load_training_data("data/Cars_training_data.csv")

In [4]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0

    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b
        cost = cost + (f_wb_i - y[i])**2

    cost = cost / (2 * m)
    return cost

In [5]:
def compute_gradient(X, y, w, b):
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        err = float((np.dot(X[i], w) + b) - y[i])
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i, j]
        dj_db = dj_db + err

    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_db, dj_dw
        

In [6]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in

    for i in tqdm(range(num_iters)):
        dj_db, dj_dw = gradient_function(X, y, w, b)

        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i < 100000:
            J_history.append(cost_function(X, y, w, b))

        #if i%math.ceil(num_iters / 10) == 0:
         #   print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}    ")

    return w, b, J_history

In [None]:

initial_w = np.zeros_like(X[0])
initial_b = 0.

iterations = 10000
alpha = 0.01

scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)

y = y.reshape(-1, 1)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y).ravel()

w_final, b_final, J_hist = gradient_descent(X, y, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)

print(f"b, w found by gradient descent: {b_final:0.2f}, {w_final}")
m, _ = X.shape
for i in range(m):
    print(f"prediction: {np.dot(X[i], w_final) + b_final:0.2f}, target value: {y[i]}")
    

  1%|▋                                                                            | 95/10000 [01:54<3:16:17,  1.19s/it]

In [70]:
scaler = StandardScaler()

In [119]:
X_norm = scaler.fit_transform(X)
y = y.reshape(-1, 1)
y_norm = scaler.fit_transform(y).ravel()

In [76]:
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X,axis=0)}")
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

Peak to Peak range by column in Raw        X:[45  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1

In [120]:
sgdr = SGDRegressor(max_iter=1000000000)

In [121]:
sgdr.fit(X_norm, y_norm)

0,1,2
,loss,'squared_error'
,penalty,'l2'
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000000000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1


In [122]:
print(sgdr)

SGDRegressor(max_iter=1000000000)


In [123]:
y_pred_sgd = sgdr.predict(X_norm)

In [124]:
b_norm = sgdr.intercept_

In [125]:
w_norm = sgdr.coef_

In [130]:
y_pred = np.dot(X_norm, w_norm) + b_norm
y_pred = scaler.inverse_transform(y_norm.reshape(-1, 1)).ravel()

In [135]:
print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")
print(f"Prediction on training set:\n{y_pred[:-5]}" )
print(f"Target values \n{y[:-5]}")

prediction using np.dot() and sgdr.predict match: False
Prediction on training set:
[695. 650. 625. ...  35. 195.  17.]
Target values 
[[695.]
 [650.]
 [625.]
 ...
 [ 35.]
 [195.]
 [ 17.]]


In [None]:
arr = np.array([])