In [25]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import plotly.graph_objects as go

In [26]:
np.random.seed(3)
sizes = [50, 100, 500]
datasets = []
for size in sizes:
    X = np.random.uniform(-10, 10, size)
    epsilon = np.random.normal(0, 0.2, size)
    y = 4 * X - 3 + epsilon
    datasets.append((X, y))

In [27]:
# Define the linear regression model
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # One feature to one output

    def forward(self, x):
        return self.linear(x)

In [28]:
# Define the gradient descent function
def gradient_descent(model, X_tensor, y_tensor, learning_rate=0.01, num_epochs=100):
    criterion = nn.MSELoss()  # Mean Squared Error
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)  # Stochastic Gradient Descent
    
    loss_values = []

    for _ in range(num_epochs):
        y_pred = model(X_tensor)
        loss = criterion(y_pred, y_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_values.append(loss.item())

    return loss_values

In [29]:
losses = []
for X, y in datasets:
    X_tensor = torch.tensor(X, dtype=torch.float32).view(-1, 1)
    y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)
    model = LinearRegressionModel()
    loss_values = gradient_descent(model, X_tensor, y_tensor, learning_rate=0.01, num_epochs=100)
    losses.append(loss_values)

In [30]:
fig = go.Figure()

# Add traces for each dataset size
for i, (X, y) in enumerate(datasets):
    fig.add_trace(go.Scatter(x=np.arange(1000), y=losses[i], mode='lines', name=f"Dataset size {sizes[i]}"))

# Update layout with titles and axis labels
fig.update_layout(
    title="Training Loss Comparison for Different Dataset Sizes",
    xaxis_title="Epochs",
    yaxis_title="Loss (MSE)",
    template="ggplot2"
)

# Show the plot
fig.show()