# Linear Regression
This is basically trying to find a line of best fit in a multi-dimenensional data.

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## 1D-Case
$e_i$ = $y_i - (w.x_i + b)$
where $(w.x_i + b)$ is the predicted $y'_i$

Then, the energy function is 
E(w, b) = $\sum_{i=1} (e_i)^2$

In [None]:
# 1D Case
N = 10
low = 0
high = 5
x = np.random.randint(low=low, high=high, size=N)
y = np.random.randint(low=low, high=high, size=N)

# Average over x
x_hat: float = 0
for x_i in x:
    x_hat += x_i
x_hat = x_hat / N

# Average over y
y_hat: float = 0
for y_i in y:
    y_hat += y_i
y_hat = y_hat / N

# Finding the weight (gradient)
numerator: float = 0
denominator: float = 0
for i in range(N):
    numerator = numerator + ((y[i] - y_hat) * (x[i] - x_hat))
    denominator = denominator + ((x[i] - x_hat) ** 2)
w: float = numerator / denominator

# Finding the bias (y-intercept)
b: float = y_hat - w * x_hat

print("Points")
for x_i, y_i in zip(x, y):
    print(f"({x_i}, {y_i})")

print(f"Weight: {w}")
print(f"Bias: {b}")

In [None]:
x_best = list(range(low, high))
y_best = [w * x_i + b for x_i in x_best]
df = pd.DataFrame({"x": x, "y": y})

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=x_best,
        y=y_best,
        mode="lines",
        name=f"y={w:.3f}x+{b:.3f}",
        line=dict(color="red"),
    )
)

fig.add_trace(
    go.Scatter(
        x=x,
        y=y,
        mode="markers",
        name="Points",
        marker=dict(size=10, color="blue"),
    )
)

fig.update_layout(title="Linear Regression (1-Dimension)")
fig.show()

Multi-dimensional input

In [None]:
input_dimension = 2
points_num = 10
low = 0
high = 5
x = np.random.randint(low=low, high=high, size=(points_num, input_dimension))
# y = np.random.randint(low=low, high=high, size=(points_num, 1))
y = np.random.randint(low=low, high=high, size=points_num)
print(f"x: {x}")
print(f"y: {y}")

new_column = np.ones((points_num, 1), dtype=int)
x_tilde = np.hstack([x, new_column])
# y_tilde = np.hstack([y, new_column])
print(f"x_tilde: {x_tilde}")
# print(f"y_tilde: {y_tilde}")

x_transpose = np.transpose(x_tilde)
x_pseudoinverse = np.linalg.inv((x_transpose @ x_tilde)) @ x_transpose
w_tilde = x_pseudoinverse @ y
print(f"w_tilde: {w_tilde}")

w_0 = w_tilde[0]
w_1 = w_tilde[1]
b = w_tilde[-1]

In [None]:
x1_coord = list(range(low, high))
x2_coord = list(range(low, high))
x1_best, x2_best = np.meshgrid(x1_coord, x2_coord)
y_best = w_0 * x1_best + w_1 * x2_best + b

fig = go.Figure()
fig.add_trace(go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", name="points"))
fig.add_trace(
    go.Surface(
        x=x1_best,
        y=x2_best,
        z=y_best,
        name=f"y={w_0:.2f}+{w_1:.2f}x1+{b:.2f}x2",
        opacity=0.6,
    )
)
fig.update_layout(
    scene=dict(xaxis_title="x1", yaxis_title="x2", zaxis_title="y"),
    title="3D points + fitted plane",
)
fig.show()

Multi-dimensional input & output

In [1]:
input_dimension = 5
output_dimension = 3
points_num = 10
low = 0
high = 5
x = np.random.uniform(low=low, high=high, size=(points_num, input_dimension))
y = np.random.uniform(low=low, high=high, size=(points_num, output_dimension))
print(f"x: {x}")
print(f"y: {y}")

new_column = np.ones((points_num, 1), dtype=int)
x_tilde = np.hstack([x, new_column])
print(f"x_tilde: {x_tilde}")

x_transpose = np.transpose(x_tilde)
x_pseudoinverse = np.linalg.inv((x_transpose @ x_tilde)) @ x_transpose
w_tilde = x_pseudoinverse @ y
print(f"w_tilde: {w_tilde}")
print(f"Size (input_dimension + 1, output_dimension): {w_tilde.shape}")

x: [[2.83829043 3.02049789 2.55286417 1.69901205 0.45690156]
 [3.88251296 2.89315455 2.36419662 3.33236424 2.68095383]
 [0.38445027 4.73931318 3.12323169 1.56615063 0.58905862]
 [0.96398737 4.53949298 3.89045842 4.98660771 2.36043569]
 [1.44371073 0.61550565 1.36393276 1.58544162 1.34433204]
 [0.35851998 2.67522764 2.45182309 3.35278666 2.35317183]
 [2.82959509 0.25763946 0.27650598 3.83631321 4.34767831]
 [3.01863005 2.659213   3.75246186 4.2908081  3.76577558]
 [4.17314842 2.41814331 4.61957203 1.07461431 4.6105052 ]
 [0.68949074 0.35506947 3.96096926 3.28733046 0.07728608]]
y: [[4.55603484 2.61339739 4.48088379]
 [4.25164643 3.33262217 4.42517671]
 [0.25201342 4.01566633 3.0275446 ]
 [1.71451038 2.30041889 1.66351181]
 [0.91529699 2.92993454 2.74110487]
 [3.012874   1.01874589 4.32591568]
 [4.5281312  3.85132119 1.79421562]
 [0.37257668 3.97017649 3.51760117]
 [3.36719422 0.8085631  2.77692595]
 [4.86739714 3.06198982 3.25494881]]
x_tilde: [[2.83829043 3.02049789 2.55286417 1.699012

In [None]:
fig = make_subplots(
    rows=output_dimension,
    cols=input_dimension,
    subplot_titles=[
        f"Input: {i % input_dimension}, Output: {i // input_dimension}"
        for i in range(input_dimension * output_dimension)
    ],
)
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.update_layout(
    height=600, width=1200, title="Linear Regression of Multi-dimensional data"
)

for output_v in range(output_dimension):
    w_vec = w_tilde[:-1, output_v]
    b0 = w_tilde[-1, output_v]
    x_means = x.mean(axis=0)

    for input_v in range(input_dimension):
        x_coord = np.arange(low, high)
        w = w_tilde[input_v, output_v]
        # b = w_tilde[-1, output_v]
        b = b0 + (w_vec @ x_means) - w_vec[input_v] * x_means[input_v]
        y_best = w * x_coord + b
        fig.add_trace(
            go.Scatter(
                x=x_coord,
                y=y_best,
                mode="lines",
                name=f"y={w:.3f}x+{b:.3f}",
                line=dict(color="red"),
            ),
            row=output_v + 1,
            col=input_v + 1,
        )
        fig.add_trace(
            go.Scatter(
                x=x[:, input_v],
                y=y[:, output_v],
                mode="markers",
                name="Points",
                marker=dict(size=10, color="blue"),
            ),
            row=output_v + 1,
            col=input_v + 1,
        )
fig.show()