
# Visualization of Basic ML training
Model: $$\hat{y} = w_1 x_1 + w_2 x_2$$



In [None]:
import numpy as np #Importing the required libraries
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
import plotly.graph_objects as go

random = np.random.default_rng(0)


## 1) Model diagram
Two inputs to one output. Weights $w_1, w_2$.
This next cell is used only to plot the model diagram, it's not necessary for the rest of the notebook


In [None]:
fig, ax = plt.subplots(figsize=(5.5,4))
ax.axis('off')

x1 = (0.2, 0.7)
x2 = (0.2, 0.3)
y  = (0.8, 0.5)
radius = 0.06

for (cx, cy), label in [(x1,'x₁'), (x2,'x₂'), (y,'ŷ')]:
    circle = plt.Circle((cx, cy), radius, fill=False, linewidth=2)
    ax.add_patch(circle)
    ax.text(cx, cy, label, ha='center', va='center', fontsize=12)

def edge_point(p1, p2, r):
    dx, dy = p2[0]-p1[0], p2[1]-p1[1]
    d = np.hypot(dx, dy)
    if d == 0: return p1
    return (p1[0] + dx/d*r, p1[1] + dy/d*r)

for src, dst in [(x1, y), (x2, y)]:
    start = edge_point(src, dst, radius)
    end = edge_point(dst, src, radius)
    ax.annotate('', xy=end, xytext=start, arrowprops=dict(arrowstyle='->', lw=2))

ax.text((x1[0]+y[0])/2, (x1[1]+y[1])/2+0.05, 'w₁', ha='center', va='center', fontsize=12)
ax.text((x2[0]+y[0])/2, (x2[1]+y[1])/2-0.05, 'w₂', ha='center', va='center', fontsize=12)

ax.set_xlim(0, 1); ax.set_ylim(0, 1)
plt.title('ML Model of this example:')
plt.text(0.5, -0.08, r'$\hat{y} = w_1 x_1 + w_2 x_2$', ha='center', va='top', fontsize=14, transform=ax.transAxes)
plt.show()



## 2) Synthetic dataset and table
Ground truth: $$y = 2x_1 - 1.5x_2 + \varepsilon$$


In [None]:
n = 60 # Number of samples
X = random.normal(0, 1.0, size=(n, 2)) # Generate random features x1 and x2
w_true = np.array([2.0, -1.5]) # Ground truth desidered weights
y = X @ w_true + random.normal(loc=0, scale=3.9, size=n) # Generate target variable y with noise

df = pd.DataFrame({'x1': X[:,0], 'x2': X[:,1], 'y': y}) # Create DataFrame viewing the generated points
display(df.head(5)) # Display first 5 rows of the dataset

fig = go.Figure(data=[ # 3D interactive scatter plot of the data points
    go.Scatter3d(
        x=X[:,0], y=X[:,1], z=y,
        mode='markers',
        marker=dict(size=6, color=y, colorscale='viridis', colorbar=dict(title='y')),
        name='Data points'
    )
])

fig.update_layout( # Set plot layout and axis labels
    title='Synthetic Data Points (3D view)',
    scene=dict(
        xaxis_title='x1',
        yaxis_title='x2',
        zaxis_title='y',
        aspectmode='cube',
        zaxis=dict(range=[-75, 100]) # <-- Set y (z) axis range
    )
)

fig.show() # Show plot



## 3) Training loop
We track $(w_1, w_2, loss)$ each step for visualization purposes.


In [None]:
#Defining the loss and gradient functions
# @ operator means matrix multiplication
def loss(w):
    r = X @ w - y
    return (r @ r) / (2.0 * len(y))

def grad(w):
    r = X @ w - y
    return (X.T @ r) / len(y)


w0 = np.array([-3.0, 30.0])   # poor init to show movement
eta = 0.25 # learning rate
steps = 30  # number of gradient descent steps

w = w0.copy()
path = []
for _ in range(steps): # Gradient descent loop to optimize weights (training)
    path.append([w[0], w[1], loss(w)])
    w -= eta * grad(w)
path = np.array(path) # Store the path of weights and losses for each step for later visualization
w_final = path[-1,:2]; j_final = path[-1,2]

# For later plots
y_hat_init  = X @ w0
y_hat_final = X @ w_final


## 4) Model plane during the training vs data
This 3D plot shows the data points and the model's plane prediction during training.


In [None]:
x1_grid = np.linspace(X[:,0].min()-0.5, X[:,0].max()+0.5, 25)
x2_grid = np.linspace(X[:,1].min()-0.5, X[:,1].max()+0.5, 25)
X1g, X2g = np.meshgrid(x1_grid, x2_grid)

def plane_z(w):
    return w[0]*X1g + w[1]*X2g

# Calculate z-range for all steps to keep axis fixed
all_plane_z = np.array([plane_z(p[:2]) for p in path])
z_min = min(y.min(), all_plane_z.min())
z_max = max(y.max(), all_plane_z.max())


# Create figure with slider
fig = go.Figure()

# Add initial data points (trace 0)
fig.add_trace(go.Scatter3d(x=X[:,0], y=X[:,1], z=y, mode='markers', name='Data'))

# Add initial model plane (trace 1)
fig.add_trace(go.Surface(x=X1g, y=X2g, z=plane_z(path[0][:2]), opacity=0.5, colorscale='viridis', showscale=False))

# Create and add slider
steps_slider = []
for i in range(len(path)):
    step = dict(
        method="update",
        args=[
            # Update z for both traces: data points (trace 0) and surface (trace 1)
            {"z": [y, plane_z(path[i][0:2])]},
            {"title": f"Model Plane at Step {i}"}
        ],
        label=str(i)
    )
    steps_slider.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Step: "},
    pad={"t": 50},
    steps=steps_slider
)]

fig.update_layout(
    sliders=sliders,
    title='Model Plane at Step 0',
    scene=dict(
        xaxis_title='x1',
        yaxis_title='x2',
        zaxis_title='y',
        aspectmode='cube',
        zaxis=dict(range=[z_min, z_max]) # Set fixed z-axis range
    ),
    width=700,   # Fixed width
    height=600   # Fixed height
)

fig.show()


## 5) Gradient descent on the loss surface
This plot shows the "hill" (loss surface) and how gradient descent walks down it to find the minimum loss.


In [None]:
# Create a grid for the loss surface
w1_grid = np.linspace(path[:,0].min()-1, path[:,0].max()+1, 30)
w2_grid = np.linspace(-30, 30, 30) # Use fixed range for w2
W1g, W2g = np.meshgrid(w1_grid, w2_grid)
J_grid = np.array([loss(np.array([w1, w2])) for w1, w2 in zip(np.ravel(W1g), np.ravel(W2g))]).reshape(W1g.shape)

# Create figure with slider
fig = go.Figure()

# Add loss surface (trace 0)
fig.add_trace(go.Surface(x=W1g, y=W2g, z=J_grid, opacity=0.7, colorscale='cividis', name='Loss Surface', showscale=False))

# Add the full path (trace 1) but make it subtle
fig.add_trace(go.Scatter3d(x=path[:,0], y=path[:,1], z=path[:,2], mode='lines',
                           line=dict(color='lightgrey', width=4),
                           name='Full Path'))

# Add a marker for the current step (trace 2)
fig.add_trace(go.Scatter3d(x=[path[0,0]], y=[path[0,1]], z=[path[0,2]], mode='markers',
                           marker=dict(color='red', size=5),
                           name='Current Step'))

# Create and add slider
steps_slider = []
for i in range(len(path)):
    step = dict(
        method="update",
        args=[
            # Update only the current step marker (trace 2)
            {"x": [path[i:i+1,0]], "y": [path[i:i+1,1]], "z": [path[i:i+1,2]]},
            {"title": f"Gradient Descent on Loss Surface (Step {i})"},
            [2] # index of trace to update
        ],
        label=str(i)
    )
    steps_slider.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Step: "},
    pad={"t": 50},
    steps=steps_slider
)]

# Set fixed axis ranges and update title
fig.update_layout(
    sliders=sliders,
    title='Gradient Descent on Loss Surface (Step 0)',
    scene=dict(
        xaxis_title='w1',
        yaxis_title='w2',
        zaxis_title='Loss J',
        aspectmode='cube',
        xaxis=dict(range=[w1_grid.min(), w1_grid.max()]),
        yaxis=dict(range=[30, -30]),
        zaxis=dict(range=[min(J_grid.min(), path[:, 2].min()), max(J_grid.max(), path[:, 2].max())])
    )
)

fig.show()
