<a href="https://colab.research.google.com/github/Matrix7043/Machine_learning101/blob/main/Linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from typing import Callable
import numpy as np
from numpy import ndarray
import pandas as pd

In [35]:
df = pd.read_csv('/content/sample_data/diamond.csv')
df1 = df[['carat', 'x', 'y', 'z', 'depth']].head(1000)
df2 = df[['price']].head(1000)
y = np.array(df2['price'])
x = np.transpose(np.array([df1['carat'],df1['depth'], df1['x'], df1['y'], df1['z']]))
y = y.reshape(len(x))

In [5]:
Array_function = Callable[[ndarray, ndarray], ndarray]
Chain = list[Array_function]

In [6]:
def init_weights(X: ndarray,
                 zeros: bool = False,
                 ones: bool = False) -> dict[str, ndarray]:

    weights: dict[str, ndarray] = {}

    if ones:
        weights['W'] = np.ones(X.shape[1])
        weights['B'] = np.ones(X.shape[0])
    elif zeros:
        weights['W'] = np.zeros(X.shape[1])
        weights['B'] = np.zeros(X.shape[0])
    else:
        weights['W'] = np.random.randn(X.shape[1])
        weights['B'] = np.random.randn(X.shape[0])

    return weights

In [7]:
def par_deriv(input: ndarray,
          input_nd: ndarray,
          function: Array_function,
          delta: float = 0.001) -> ndarray:

    return (function(input + delta, input_nd) - function(input - delta, input_nd))/(2*delta)

In [8]:
def deriv(func: Callable[[ndarray], ndarray],
          input_: ndarray,
          delta: float = 0.001) -> ndarray:

    return (func(input_ + delta) - func(input_ - delta)) / (2 * delta)

In [9]:
def mse(P: ndarray,
        Y: ndarray) -> ndarray:

    return np.power((Y - P), 2)


In [10]:
def sigmoid(x: ndarray) -> ndarray:

    return 1 / (1 + np.exp(-x))


In [11]:
def chain_deriv(chain: Chain,
                input_range: ndarray) -> ndarray:

    argument: list[ndarray] = [input_range]
    for j, i in enumerate(chain):
        argument.append(i(argument[j]))

    answer = 1
    argument.pop()
    for i, j in zip(chain, argument):
        answer *= deriv(i, j)

    return answer


In [12]:
def forward_pass(x_batch: ndarray,
                 y_batch: ndarray,
                 weights: dict[str, ndarray]) -> dict[str, ndarray]:

                 assert x_batch.shape[0] == y_batch.shape[0]

                 assert x_batch.shape[1] == weights['W'].shape[0]

                 assert weights['B'].shape[0] == y_batch.shape[0]

                 W = weights['W']

                 N = np.dot(x_batch, W)

                 P = N + weights['B']

                 L = np.mean(np.power((y_batch - P), 2))

                 forward_info: dict[str, ndarray] = {}
                 forward_info['X'] = x_batch
                 forward_info['Y'] = y_batch
                 forward_info['N'] = N
                 forward_info['P'] = P

                 return forward_info, L


In [37]:
def back_propagation(weights: dict[str, ndarray],
                     forward_info: dict[str, ndarray]) -> dict[str, ndarray]:

                     dLdP = par_deriv(forward_info['P'], forward_info['Y'], mse)

                     dPdN = np.ones_like(forward_info['N'])

                     dLdN = dLdP * dPdN

                     dPdB = np.ones_like(weights['B'])

                     dNdW = np.transpose(forward_info['X'])

                     dLdW = np.dot(dNdW, dLdN)

                     dLdB = (dLdP * dPdB).sum(axis=0)

                     loss_gradient: dict[str, ndarray] = {}
                     loss_gradient['W'] = dLdW
                     loss_gradient['B'] = dLdB

                     return loss_gradient


In [14]:
def test(X_test: ndarray,
         Y_test: ndarray,
         weights: dict[str, ndarray]) -> tuple[float, float]:

    def mae(Y: ndarray,
            P: ndarray) -> float:

        return np.mean(Y - P)

    def rmse(Y: ndarray,
             P: ndarray) -> float:

        return np.power(np.mean(np.power(P - Y, 2)), 1/2)

    def predict(X: ndarray,
                weights: dict[str, ndarray]):

        N = np.dot(X, weights['W'])
        return N + weights['B'][:X.shape[0]]

    P = predict(X_test, weights)

    return (rmse(Y_test, P), mae(Y_test, P))



In [61]:
def train(X_batch: ndarray,
          Y_batch: ndarray,
          iteration: float = 1e5,
          learning_rate: float = 0.00000001,
          split: float = 0.7) -> ndarray:

    total = X_batch.shape[0]
    num = int(total*split)

    X_train: ndarray = X_batch[:num:]
    Y_train: ndarray = Y_batch[:num:]
    # print(X_train.shape)
    # print(Y_train.shape)

    X_test: ndarray = X_batch[num::]
    Y_test: ndarray = Y_batch[num::]
    # print(X_test.shape)
    # print(Y_test.shape)

    weights = init_weights(X_train)


    for i in range(iteration):

      forward_info, loss = forward_pass(X_train, Y_train, weights)

    #   if i%10 == 0:

    #      print(loss)

      loss_gradient = back_propagation(weights, forward_info)

      for key in weights.keys():

        weights[key] -= learning_rate * loss_gradient[key]

    theta = test(X_test, Y_test, weights)

    print(theta)


In [62]:
theta = train(x, y)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
267168.4029796596
267124.53706590016
267080.6843473983
267036.84482018155
266993.01848027896
266949.2053237206
266905.40534653777
266861.618544763
266817.84491443005
266774.08445157367
266730.3371522299
266686.60301243607
266642.8820282309
266599.1741956535
266555.47951074527
266511.79796954803
266468.129568105
266424.4743024606
266380.8321686604
266337.20316275134
266293.5872807813
266249.98451879964
266206.3948728565
266162.8183390036
266119.2549132936
266075.7045917805
266032.16737051954
265988.6432455669
265945.13221298007
265901.6342688178
265858.14940914
265814.6776300076
265771.21892748296
265727.77329762944
265684.3407365118
265640.9212401957
265597.5148047482
265554.1214262374
265510.7411007329
265467.3738243048
265424.0195930252
265380.6784029669
265337.350250204
265294.0351308117
265250.73304086644
265207.44397644605
265164.16793362907
265120.9049084956
265077.6548971269
265034.4178956052
264991.1939000141
2649