In [5]:
import numpy as np
import torch
import torch.nn as nn
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Generate synthetic healthcare data
np.random.seed(42)
n_samples = 1000

# Features: age, bmi, blood_pressure
X = np.random.randn(n_samples, 3)
X[:, 0] = X[:, 0] * 10 + 50  # Age: mean 50, std 10
X[:, 1] = X[:, 1] * 5 + 25   # BMI: mean 25, std 5
X[:, 2] = X[:, 2] * 15 + 120 # BP: mean 120, std 15

# Target: hospital stay duration (convex case)
y_convex = 2*X[:, 0] + 3*X[:, 1] + 1.5*X[:, 2] + np.random.randn(n_samples) * 10

# Target: readmission risk (non-convex case)
y_nonconvex = np.sin(X[:, 0]/10) + np.cos(X[:, 1]/5) + np.tanh(X[:, 2]/20) + np.random.randn(n_samples) * 0.2

# Convert to PyTorch tensors
X_tensor = torch.FloatTensor(X)
y_convex_tensor = torch.FloatTensor(y_convex).reshape(-1, 1)
y_nonconvex_tensor = torch.FloatTensor(y_nonconvex).reshape(-1, 1)

# Linear model (convex case)
class LinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)
    
    def forward(self, x):
        return self.linear(x)

# Neural network (non-convex case)
class NonlinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(3, 10),
            nn.ReLU(),
            nn.Linear(10, 10),
            nn.ReLU(),
            nn.Linear(10, 1)
        )
    
    def forward(self, x):
        return self.layers(x)

# Training function
def train_model(model, X, y, n_epochs=100):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()
    losses = []
    
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    
    return losses

# Train both models
model_convex = LinearModel()
model_nonconvex = NonlinearModel()

losses_convex = train_model(model_convex, X_tensor, y_convex_tensor)
losses_nonconvex = train_model(model_nonconvex, X_tensor, y_nonconvex_tensor)

# Visualization
fig = make_subplots(rows=1, cols=2, subplot_titles=('Convex Optimization (Hospital Stay)', 
                                                   'Non-convex Optimization (Readmission Risk)'))

# Plot convex loss
fig.add_trace(
    go.Scatter(y=losses_convex, mode='lines', name='Convex Loss',
               line=dict(color='blue')), row=1, col=1)

# Plot non-convex loss
fig.add_trace(
    go.Scatter(y=losses_nonconvex, mode='lines', name='Non-convex Loss',
               line=dict(color='red')), row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="Loss Curves Comparison")
fig.update_xaxes(title_text="Epochs")
fig.update_yaxes(title_text="Loss", type="log")

fig.show()

In [6]:
def visualize_gradients():
   # Create grid for visualization
   w1 = np.linspace(-2, 2, 20)
   w2 = np.linspace(-2, 2, 20)
   W1, W2 = np.meshgrid(w1, w2)

   # Calculate loss and gradients
   def get_loss_and_grad(w1, w2):
       model = LinearModel()
       model.linear.weight.data[0][0] = torch.tensor(w1)
       model.linear.weight.data[0][1] = torch.tensor(w2)
       y_pred = model(X_tensor)
       loss = nn.MSELoss()(y_pred, y_convex_tensor)
       loss.backward()
       return loss.item(), model.linear.weight.grad[0][0].item(), model.linear.weight.grad[0][1].item()

   Z = np.zeros_like(W1)
   U = np.zeros_like(W1)  # Gradient x component
   V = np.zeros_like(W1)  # Gradient y component

   for i in range(len(w1)):
       for j in range(len(w2)):
           loss, grad_x, grad_y = get_loss_and_grad(W1[i,j], W2[i,j])
           Z[i,j] = loss
           U[i,j] = -grad_x  # Negative gradient for descent direction
           V[i,j] = -grad_y

   # Track training path
   model = LinearModel()
   optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
   w1_history = []
   w2_history = []
   loss_history = []

   for _ in range(100):
       optimizer.zero_grad()
       y_pred = model(X_tensor)
       loss = nn.MSELoss()(y_pred, y_convex_tensor)
       loss.backward()
       optimizer.step()
       
       w1_history.append(model.linear.weight.data[0][0].item())
       w2_history.append(model.linear.weight.data[0][1].item())
       loss_history.append(loss.item())

   # Create visualization
   fig = go.Figure()

   # Add loss surface
   fig.add_trace(go.Surface(x=W1, y=W2, z=Z, colorscale='viridis', opacity=0.8))

   # Add gradient vectors
   skip = 2  # Plot every nth vector for clarity
   fig.add_trace(go.Cone(
       x=W1[::skip, ::skip].flatten(),
       y=W2[::skip, ::skip].flatten(),
       z=Z[::skip, ::skip].flatten(),
       u=U[::skip, ::skip].flatten(),
       v=V[::skip, ::skip].flatten(),
       w=np.zeros_like(U[::skip, ::skip].flatten()),
       colorscale='reds',
       showscale=False,
       sizemode='absolute',
       sizeref=0.5
   ))

   # Add optimization path
   fig.add_trace(go.Scatter3d(
       x=w1_history,
       y=w2_history,
       z=loss_history,
       mode='lines+markers',
       line=dict(color='red', width=4),
       marker=dict(size=4, colorscale='Reds', color=list(range(len(w1_history))))
   ))

   fig.update_layout(
       title='Loss Landscape with Gradients (Convex Case)',
       scene=dict(
           xaxis_title='Weight 1',
           yaxis_title='Weight 2',
           zaxis_title='Loss',
           camera=dict(eye=dict(x=1.5, y=1.5, z=1.2))
       ),
       width=800,
       height=800
   )
   
   fig.show()

visualize_gradients()

In [3]:
# Implementation of the Rosenbrock (banana) function - a classic optimization problem
# known for its narrow, parabolic valley which is difficult to traverse

import numpy as np
import torch
import torch.nn as nn
import plotly.graph_objects as go

# Rosenbrock function: f(x,y) = (1-x)^2 + 100(y-x^2)^2
def rosenbrock(x, y):
   return (1-x)**2 + 100*(y-x**2)**2

# Generate training data
x = np.linspace(-2, 2, 100)
y = np.linspace(-1, 3, 100)
X, Y = np.meshgrid(x, y)
Z = rosenbrock(X, Y)

# Neural network to learn the mapping
class RosenbrockNet(nn.Module):
   def __init__(self):
       super().__init__()
       self.net = nn.Sequential(
           nn.Linear(2, 32),
           nn.Tanh(),
           nn.Linear(32, 64),
           nn.ReLU(),
           nn.Linear(64, 32),
           nn.Tanh(),
           nn.Linear(32, 1)
       )
   
   def forward(self, x):
       return self.net(x)

# Training setup
model = RosenbrockNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.MSELoss()

# Training with parameter tracking
param_history = []
loss_history = []

# Training data
inputs = torch.FloatTensor(np.column_stack((X.flatten(), Y.flatten())))
targets = torch.FloatTensor(Z.flatten()).reshape(-1, 1)

# Train
for epoch in range(1500):
   optimizer.zero_grad()
   outputs = model(inputs)
   loss = criterion(outputs, targets)
   loss.backward()
   optimizer.step()
   
   # Track parameters (first layer)
   w1, w2 = model.net[0].weight[0].detach().numpy()
   param_history.append((w1, w2))
   loss_history.append(loss.item())

# Visualization
fig = go.Figure()

# Surface plot
fig.add_trace(go.Surface(
   x=X, y=Y, z=np.log(Z),  # Log scale for better visualization
   colorscale='viridis',
   opacity=0.8
))

# Gradient descent path
w1_history, w2_history = zip(*param_history)
fig.add_trace(go.Scatter3d(
   x=w1_history,
   y=w2_history,
   z=[np.log(rosenbrock(w1, w2)) for w1, w2 in param_history],
   mode='lines+markers',
   line=dict(color='red', width=4),
   marker=dict(
       size=4,
       color=list(range(len(w1_history))),
       colorscale='Reds',
       showscale=True,
       colorbar=dict(title='Epoch')
   )
))

fig.update_layout(
   title='Rosenbrock Function Optimization Landscape',
   scene=dict(
       xaxis_title='x',
       yaxis_title='y',
       zaxis_title='log(Loss)',
       camera=dict(eye=dict(x=1.5, y=1.5, z=1.2))
   ),
   width=1000,
   height=800
)

fig.show()