# MAE loss


$$
\mathcal{L} = \frac{1}{n}\sum_{i-1}^{n}|\hat{y_i}-y_i|
$$

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

# Generate input data (sine wave)
X = torch.linspace(-2 * np.pi, 2 * np.pi, 100).view(-1, 1)
y = torch.sin(X)

# Plot the original sine wave
plt.figure(figsize=(10, 5))
plt.plot(X.numpy(), y.numpy(), label='Original Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.title('Original Sine Wave')
plt.legend()
plt.grid(True)
plt.show()

# Create a meshgrid for contour plot with an extended range
weights = np.linspace(-1, 1, 10)  # Adjusted range for weights
biases = np.linspace(-1, 1, 10)   # Adjusted range for biases
W, B = np.meshgrid(weights, biases)

# Compute MSE losses on the meshgrid
mae_losses = np.zeros_like(W)

for i in range(weights.shape[0]):
    for j in range(biases.shape[0]):
        y_pred = W[i, j] * X + B[i, j]
        mae_losses[i, j] = nn.L1Loss()(y_pred, y).item()

# Plot filled contour plot for MSE loss
plt.figure(figsize=(12, 10))
contour = plt.contourf(W, B, mae_losses, levels=50, cmap='BrBG_r')
plt.colorbar(contour, label='Loss')
plt.xlabel('Weight')
plt.ylabel('Bias')
plt.title('MAE Loss Contours')

# Define a simple neural network model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # 1 input feature, 1 output (regression)
        self._initialize_weights()

    def _initialize_weights(self):
        nn.init.uniform_(self.linear.weight, -1.0, 1.0)
        nn.init.uniform_(self.linear.bias, -1.0, 1.0)

    def forward(self, x):
        return self.linear(x)

# Initialize the model, loss function, and optimizer
model = SimpleModel()
criterion = nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.2)

# Training loop
epochs = 50000
losses = []  # Store losses for each epoch
weight_trajectory = []
bias_trajectory = []

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    
    losses.append(loss.item())  # Append the loss value
    
    # Store the weight and bias values for plotting the trajectory
    with torch.no_grad():
        W_updated = model.linear.weight.item()
        B_updated = model.linear.bias.item()
        weight_trajectory.append(W_updated)
        bias_trajectory.append(B_updated)

    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item()}')

# Plot the trajectory of the weight and bias updates
plt.plot(weight_trajectory, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')

# Mark the starting point
plt.plot(weight_trajectory[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')

# Mark the ending point
plt.plot(weight_trajectory[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')

plt.legend()
plt.show()


# Plot the trajectory of the weight and bias updates
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()



# MSE Loss
$$
\mathcal{L} = \frac{1}{n}\sum_{i-1}^{n}(\hat{y_i}-y_i)^2
$$

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

# Generate input data (sine wave)
X = torch.linspace(-2 * np.pi, 2 * np.pi, 100).view(-1, 1)
y = torch.sin(X)

# Plot the original sine wave
plt.figure(figsize=(10, 5))
plt.plot(X.numpy(), y.numpy(), label='Original Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.title('Original Sine Wave')
plt.legend()
plt.grid(True)
plt.show()

# Create a meshgrid for contour plot with an extended range
weights = np.linspace(-1, 1, 100)  # Adjusted range for weights
biases = np.linspace(-1, 1, 100)   # Adjusted range for biases
W, B = np.meshgrid(weights, biases)

# Compute MSE losses on the meshgrid
mse_losses = np.zeros_like(W)

for i in range(weights.shape[0]):
    for j in range(biases.shape[0]):
        y_pred = W[i, j] * X + B[i, j]
        mse_losses[i, j] = nn.MSELoss()(y_pred, y).item()

# Plot filled contour plot for MSE loss
plt.figure(figsize=(12, 10))
contour = plt.contourf(W, B, mse_losses, levels=50, cmap='BrBG_r')
plt.colorbar(contour, label='Loss')
plt.xlabel('Weight')
plt.ylabel('Bias')
plt.title('MSE Loss Contours')

# Define a simple neural network model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # 1 input feature, 1 output (regression)
        self._initialize_weights()

    def _initialize_weights(self):
        nn.init.uniform_(self.linear.weight, -1.0, 1.0)
        nn.init.uniform_(self.linear.bias, -1.0, 1.0)

    def forward(self, x):
        return self.linear(x)

# Initialize the model, loss function, and optimizer
model = SimpleModel()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
epochs = 50000
losses = []  # Store losses for each epoch
weight_trajectory = []
bias_trajectory = []

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    
    losses.append(loss.item())  # Append the loss value
    
    # Store the weight and bias values for plotting the trajectory
    with torch.no_grad():
        W_updated = model.linear.weight.item()
        B_updated = model.linear.bias.item()
        weight_trajectory.append(W_updated)
        bias_trajectory.append(B_updated)

    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item()}')


# Plot the trajectory of the weight and bias updates
plt.plot(weight_trajectory, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')

# Mark the starting point
plt.plot(weight_trajectory[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')

# Mark the ending point
plt.plot(weight_trajectory[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')

plt.legend()
plt.show()


# Plot the trajectory of the weight and bias updates
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()


# BCE Loss

$$
\mathcal{L} = -\frac{1}{n}\sum_{i-1}^{n}\left(y_i \log(\hat{y}_i) + (1-y_i) \log(1-\hat{y}_i)\right)

$$

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons

# Generate synthetic dataset (moons)
X, y = make_moons(n_samples=100, noise=0.05, random_state=42)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).view(-1, 1)

# Plot the synthetic dataset
plt.figure(figsize=(10, 5))
plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y.numpy().reshape(-1), cmap=plt.cm.viridis, edgecolor='k')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Moons Dataset')
plt.grid(True)
plt.show()

# Define a simple neural network model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(2, 1)  # 2 input features, 1 output (classification)
        self._initialize_weights()

    def _initialize_weights(self):
        nn.init.uniform_(self.linear.weight, -1.0, 1.0)
        nn.init.uniform_(self.linear.bias, -1.0, 1.0)

    def forward(self, x):
        return self.linear(x)

# Initialize the model, loss function, and optimizer
model = SimpleModel()
criterion = nn.BCEWithLogitsLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# Training loop
epochs = 50000
losses = []

# Initialize trajectories for weights and biases
weight_trajectory_0 = []
weight_trajectory_1 = []
bias_trajectory = []

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

    # Store the weight and bias values for plotting the trajectory
    with torch.no_grad():
        weight_trajectory_0.append(model.linear.weight[0, 0].item())
        weight_trajectory_1.append(model.linear.weight[0, 1].item())
        bias_trajectory.append(model.linear.bias[0].item())

    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item()}')

# Convert trajectories to numpy arrays
weight_trajectory_0 = np.array(weight_trajectory_0)
weight_trajectory_1 = np.array(weight_trajectory_1)
bias_trajectory = np.array(bias_trajectory)

# Create a meshgrid for contour plot with an extended range
weights = np.linspace(-5, 5, 100)  # Adjusted range for weights
biases = np.linspace(-5, 5, 100)   # Adjusted range for biases
W, B = np.meshgrid(weights, biases)

# Compute BCE losses on the meshgrid
bce_losses_weight_0 = np.zeros_like(W)
bce_losses_weight_1 = np.zeros_like(W)

for i in range(weights.shape[0]):
    for j in range(biases.shape[0]):
        with torch.no_grad():
            model.linear.weight[0, 0] = weights[i]
            model.linear.bias[0] = biases[j]
            outputs = model(X)
            bce_losses_weight_0[i, j] = criterion(outputs, y).item()

            model.linear.weight[0, 1] = weights[i]
            model.linear.bias[0] = biases[j]
            outputs = model(X)
            bce_losses_weight_1[i, j] = criterion(outputs, y).item()

# Plot the BCE loss contours and the training trajectory for weight[0,0]
plt.figure(figsize=(8, 20))

plt.subplot(2, 1, 1)
contour_weight_0 = plt.contourf(W, B, bce_losses_weight_0, levels=50, cmap='BrBG_r')
plt.colorbar(contour_weight_0, label='Loss')
plt.plot(weight_trajectory_0, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')
plt.plot(weight_trajectory_0[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')
plt.plot(weight_trajectory_0[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')
plt.xlabel('Weight for Feature 1')
plt.ylabel('Bias')
plt.title('BCE Loss Contours for Weight[0,0]')
plt.legend()

# Plot the BCE loss contours and the training trajectory for weight[0,1]
plt.subplot(2, 1, 2)
contour_weight_1 = plt.contourf(W, B, bce_losses_weight_1, levels=50, cmap='BrBG_r')
plt.colorbar(contour_weight_1, label='Loss')
plt.plot(weight_trajectory_1, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')
plt.plot(weight_trajectory_1[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')
plt.plot(weight_trajectory_1[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')
plt.xlabel('Weight for Feature 2')
plt.ylabel('Bias')
plt.title('BCE Loss Contours for Weight[0,1]')
plt.legend()

plt.tight_layout()
plt.show()

# Plot the training loss over epochs
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()


# Focal Loss


$$
\mathcal{L} = -\alpha_t.(1-p_t)^\gamma \log(p_t)
$$

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons

# Generate synthetic dataset (moons)
X, y = make_moons(n_samples=100, noise=0.05, random_state=42)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).view(-1, 1)

# Plot the synthetic dataset
plt.figure(figsize=(10, 5))
plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y.numpy().reshape(-1), cmap=plt.cm.viridis, edgecolor='k')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Moons Dataset')
plt.grid(True)
plt.show()

# Define a simple neural network model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(2, 1)  # 2 input features, 1 output (classification)
        self._initialize_weights()

    def _initialize_weights(self):
        nn.init.uniform_(self.linear.weight, -1.0, 1.0)
        nn.init.uniform_(self.linear.bias, -1.0, 1.0)

    def forward(self, x):
        return self.linear(x)

# Define Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

# Initialize the model, loss function, and optimizer
model = SimpleModel()
criterion = FocalLoss(alpha=1, gamma=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# Training loop
epochs = 50000
losses = []

# Initialize trajectories for both features
weight_trajectory_0 = []
weight_trajectory_1 = []
bias_trajectory = []

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

    # Store the weight and bias values for plotting the trajectory
    with torch.no_grad():
        weight_trajectory_0.append(model.linear.weight[0, 0].item())
        weight_trajectory_1.append(model.linear.weight[0, 1].item())
        bias_trajectory.append(model.linear.bias[0].item())

    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item()}')

# Convert trajectories to numpy arrays
weight_trajectory_0 = np.array(weight_trajectory_0)
weight_trajectory_1 = np.array(weight_trajectory_1)
bias_trajectory = np.array(bias_trajectory)

# Create a meshgrid for contour plot with an extended range
weights = np.linspace(-2.5, 2.5, 100)  # Adjusted range for weights
biases = np.linspace(-2.5, 2.5, 100)   # Adjusted range for biases
W, B = np.meshgrid(weights, biases)

# Compute Focal Losses on the meshgrid
focal_losses_weight_0 = np.zeros_like(W)
focal_losses_weight_1 = np.zeros_like(W)

for i in range(weights.shape[0]):
    for j in range(biases.shape[0]):
        with torch.no_grad():
            model.linear.weight[0, 0] = weights[i]
            model.linear.bias[0] = biases[j]
            outputs = model(X)
            focal_losses_weight_0[i, j] = criterion(outputs, y).item()

            model.linear.weight[0, 1] = weights[i]
            model.linear.bias[0] = biases[j]
            outputs = model(X)
            focal_losses_weight_1[i, j] = criterion(outputs, y).item()

# Plot the Focal Loss contours and the training trajectory for weight[0,0]
plt.figure(figsize=(10, 20))

plt.subplot(2, 1, 1)
contour_weight_0 = plt.contourf(W, B, focal_losses_weight_0, levels=50, cmap='BrBG_r')
plt.colorbar(contour_weight_0, label='Loss')
plt.plot(weight_trajectory_0, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')
plt.plot(weight_trajectory_0[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')
plt.plot(weight_trajectory_0[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')
plt.xlabel('Weight for Feature 1')
plt.ylabel('Bias')
plt.title('Focal Loss Contours for Weight[0,0]')
plt.legend()

# Plot the Focal Loss contours and the training trajectory for weight[0,1]
plt.subplot(2, 1, 2)
contour_weight_1 = plt.contourf(W, B, focal_losses_weight_1, levels=50, cmap='BrBG_r')
plt.colorbar(contour_weight_1, label='Loss')
plt.plot(weight_trajectory_1, bias_trajectory, marker='o', markersize=2, color='red', label='Trajectory')
plt.plot(weight_trajectory_1[0], bias_trajectory[0], marker='o', markersize=10, color='green', label='Start')
plt.plot(weight_trajectory_1[-1], bias_trajectory[-1], marker='x', markersize=10, color='blue', label='End')
plt.xlabel('Weight for Feature 2')
plt.ylabel('Bias')
plt.title('Focal Loss Contours for Weight[0,1]')
plt.legend()

plt.tight_layout()
plt.show()

# Plot the training loss over epochs
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()


# Exercise


Experiment with these loss  functions along with other GD techniques (momentum, nesterov, ADAM) and visualize these loss surfaces along with trajectories