<a href="https://colab.research.google.com/github/Papa-Panda/Paper_reading/blob/main/toy_self_driving.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# toy example of self-driving
# next steps: BEV net, occupancy net, end to end

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Perception: Object detection network (simplified CNN)
class PerceptionNet(nn.Module):
    def __init__(self):
        super(PerceptionNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Linear(32 * 8 * 8, 4)  # Output: bounding box [x, y, w, h]

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# Prediction: Trajectory prediction network (RNN)
class PredictionNet(nn.Module):
    def __init__(self):
        super(PredictionNet, self).__init__()
        self.rnn = nn.LSTM(input_size=8, hidden_size=16, num_layers=1, batch_first=True)  # Update input_size to 8
        self.fc = nn.Linear(16, 2)  # Output: predicted position [dx, dy]

    def forward(self, x):
        x, _ = self.rnn(x)
        return self.fc(x[:, -1, :])


# Planning: Path planning network (MLP)
class PlanningNet(nn.Module):
    def __init__(self):
        super(PlanningNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(8, 32),  # Adjust input size to match actual input shape
            nn.ReLU(),
            nn.Linear(32, 2)  # Output: planned movement [dx, dy]
        )

    def forward(self, x):
        return self.fc(x)


# Simulated data and pipeline
batch_size = 4
image_data = torch.randn(batch_size, 3, 32, 32)  # Example images
past_trajectory = torch.randn(batch_size, 5, 4)  # Example past trajectories
car_state = torch.randn(batch_size, 2)  # Example car state [x, y]

def train_self_driving():
    perception_net = PerceptionNet()
    prediction_net = PredictionNet()
    planning_net = PlanningNet()

    criterion = nn.MSELoss()
    optimizer = optim.Adam(
        list(perception_net.parameters()) +
        list(prediction_net.parameters()) +
        list(planning_net.parameters()), lr=0.001
    )

    for epoch in range(10):  # Training loop
        # Perception: Detect objects
        detected_boxes = perception_net(image_data)  # Outputs bounding boxes [batch_size, 4]

        # Prediction: Predict future trajectory
        prediction_input = torch.cat((past_trajectory, detected_boxes.unsqueeze(1).repeat(1, 5, 1)), dim=2)
        predicted_trajectory = prediction_net(prediction_input)

        # Planning: Generate path to avoid collision
        planning_input = torch.cat((car_state, predicted_trajectory, detected_boxes), dim=1)
        planned_path = planning_net(planning_input)

        # Compute loss (dummy target for illustration)
        target_path = torch.zeros_like(planned_path)  # Assume the goal is [0, 0]
        loss = criterion(planned_path, target_path)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch + 1}, Loss: {loss.item():.4f}")

train_self_driving()

Epoch 1, Loss: 0.0659
Epoch 2, Loss: 0.0352
Epoch 3, Loss: 0.0183
Epoch 4, Loss: 0.0082
Epoch 5, Loss: 0.0055
Epoch 6, Loss: 0.0033
Epoch 7, Loss: 0.0022
Epoch 8, Loss: 0.0010
Epoch 9, Loss: 0.0016
Epoch 10, Loss: 0.0022


In [None]:
# version 2: with control

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.models import resnet18
import numpy as np

# Perception Module: Simple Convolutional Neural Network for object detection
class PerceptionNet(nn.Module):
    def __init__(self, num_classes=3):  # Example: Car, Pedestrian, Traffic Light
        super(PerceptionNet, self).__init__()
        self.backbone = resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

# Decision-Making Module: Fully Connected Network for action selection
class DecisionNet(nn.Module):
    def __init__(self, input_size=3, hidden_size=32, output_size=4):  # Example actions: Stop, Slow, Turn, Accelerate
        super(DecisionNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.fc(x)

# Control Module: Simple PID-like controller
class ControlNet(nn.Module):
    def __init__(self, input_size=4, output_size=2):  # Example outputs: Steering, Throttle
        super(ControlNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, output_size)
        )

    def forward(self, x):
        return self.fc(x)

# Synthetic Training Data
def generate_synthetic_data(batch_size=32):
    images = torch.rand(batch_size, 3, 224, 224)  # Random image data
    labels = torch.randint(0, 3, (batch_size,))  # Perception labels
    decisions = torch.rand(batch_size, 3)  # Random outputs from perception
    actions = torch.rand(batch_size, 4)  # Random decision outputs
    controls = torch.rand(batch_size, 2)  # Steering and throttle
    return images, labels, decisions, actions, controls

# Initialize models
perception_net = PerceptionNet()
decision_net = DecisionNet()
control_net = ControlNet()

# Optimizers and Losses
criterion = nn.CrossEntropyLoss()
optimizer_perception = optim.Adam(perception_net.parameters(), lr=0.001)
optimizer_decision = optim.Adam(decision_net.parameters(), lr=0.001)
optimizer_control = optim.Adam(control_net.parameters(), lr=0.001)

# Training Loop
for epoch in range(5):  # Toy example, training for 5 epochs
    images, labels, decisions, actions, controls = generate_synthetic_data()

    # Perception Training
    preds = perception_net(images)
    loss_perception = criterion(preds, labels)
    optimizer_perception.zero_grad()
    loss_perception.backward()
    optimizer_perception.step()

    # Decision Training
    decision_preds = decision_net(decisions)
    loss_decision = torch.mean((decision_preds - actions) ** 2)
    optimizer_decision.zero_grad()
    loss_decision.backward()
    optimizer_decision.step()

    # Control Training
    control_preds = control_net(actions)
    loss_control = torch.mean((control_preds - controls) ** 2)
    optimizer_control.zero_grad()
    loss_control.backward()
    optimizer_control.step()

    print(f"Epoch {epoch+1}: Losses -> Perception: {loss_perception.item()}, Decision: {loss_decision.item()}, Control: {loss_control.item()}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 86.2MB/s]


Epoch 1: Losses -> Perception: 1.4048140048980713, Decision: 0.3863893151283264, Control: 0.23576077818870544
Epoch 2: Losses -> Perception: 1.141972541809082, Decision: 0.32080045342445374, Control: 0.23125159740447998
Epoch 3: Losses -> Perception: 1.0794557332992554, Decision: 0.34922540187835693, Control: 0.22460663318634033
Epoch 4: Losses -> Perception: 1.2529215812683105, Decision: 0.35754597187042236, Control: 0.23803827166557312
Epoch 5: Losses -> Perception: 1.7396715879440308, Decision: 0.28207042813301086, Control: 0.2812086343765259


In [None]:
# version 3: with control and planning

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.models import resnet18
import numpy as np

# Perception Module: Simple Convolutional Neural Network for object detection
class PerceptionNet(nn.Module):
    def __init__(self, num_classes=3):  # Example: Car, Pedestrian, Traffic Light
        super(PerceptionNet, self).__init__()
        self.backbone = resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

# Prediction Module: Predict future trajectories of detected objects
class PredictionNet(nn.Module):
    def __init__(self, input_size=4, hidden_size=32, output_size=10):  # Example: Predict 10 future steps
        super(PredictionNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.fc(x)  # Outputs a sequence of future positions (simplified)

# Planning Module: Generate a trajectory for the car
class PlanningNet(nn.Module):
    def __init__(self, input_size=10, hidden_size=32, output_size=5):  # Example: Generate 5 control waypoints
        super(PlanningNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.fc(x)  # Outputs planned waypoints (simplified)

# Control Module: Simple PID-like controller
class ControlNet(nn.Module):
    def __init__(self, input_size=5, output_size=2):  # Example outputs: Steering, Throttle
        super(ControlNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, output_size)
        )

    def forward(self, x):
        return self.fc(x)

# Synthetic Training Data
def generate_synthetic_data(batch_size=32):
    images = torch.rand(batch_size, 3, 224, 224)  # Random image data
    labels = torch.randint(0, 3, (batch_size,))  # Perception labels
    object_states = torch.rand(batch_size, 4)  # Object states: [x, y, vx, vy]
    predicted_trajectories = torch.rand(batch_size, 10)  # Predicted positions
    planned_waypoints = torch.rand(batch_size, 5)  # Planned waypoints
    controls = torch.rand(batch_size, 2)  # Steering and throttle
    return images, labels, object_states, predicted_trajectories, planned_waypoints, controls

# Initialize models
perception_net = PerceptionNet()
prediction_net = PredictionNet()
planning_net = PlanningNet()
control_net = ControlNet()

# Optimizers and Losses
criterion_classification = nn.CrossEntropyLoss()
criterion_regression = nn.MSELoss()
optimizer_perception = optim.Adam(perception_net.parameters(), lr=0.001)
optimizer_prediction = optim.Adam(prediction_net.parameters(), lr=0.001)
optimizer_planning = optim.Adam(planning_net.parameters(), lr=0.001)
optimizer_control = optim.Adam(control_net.parameters(), lr=0.001)

# Training Loop
for epoch in range(5):  # Toy example, training for 5 epochs
    images, labels, object_states, predicted_trajectories, planned_waypoints, controls = generate_synthetic_data()

    # -----------------------
    # Perception Training
    # -----------------------
    perception_net.train()
    preds = perception_net(images)
    loss_perception = criterion_classification(preds, labels)
    optimizer_perception.zero_grad()
    loss_perception.backward()
    optimizer_perception.step()

    # -----------------------
    # Prediction Training
    # -----------------------
    prediction_net.train()
    trajectory_preds = prediction_net(object_states)
    loss_prediction = criterion_regression(trajectory_preds, predicted_trajectories)
    optimizer_prediction.zero_grad()
    loss_prediction.backward()
    optimizer_prediction.step()

    # Detach trajectory_preds to prevent gradients from flowing back to PredictionNet
    trajectory_preds_detached = trajectory_preds.detach()

    # -----------------------
    # Planning Training
    # -----------------------
    planning_net.train()
    planning_preds = planning_net(trajectory_preds_detached)
    loss_planning = criterion_regression(planning_preds, planned_waypoints)
    optimizer_planning.zero_grad()
    loss_planning.backward()
    optimizer_planning.step()

    # Detach planning_preds to prevent gradients from flowing back to PlanningNet
    planning_preds_detached = planning_preds.detach()

    # -----------------------
    # Control Training
    # -----------------------
    control_net.train()
    control_preds = control_net(planning_preds_detached)
    loss_control = criterion_regression(control_preds, controls)
    optimizer_control.zero_grad()
    loss_control.backward()
    optimizer_control.step()

    print(f"Epoch {epoch+1}: Losses -> Perception: {loss_perception.item():.4f}, "
          f"Prediction: {loss_prediction.item():.4f}, Planning: {loss_planning.item():.4f}, "
          f"Control: {loss_control.item():.4f}")


Epoch 1: Losses -> Perception: 1.1259, Prediction: 0.3441, Planning: 0.2727, Control: 0.6104
Epoch 2: Losses -> Perception: 1.2355, Prediction: 0.3691, Planning: 0.2864, Control: 0.5011
Epoch 3: Losses -> Perception: 1.3155, Prediction: 0.3598, Planning: 0.3078, Control: 0.5565
Epoch 4: Losses -> Perception: 1.2894, Prediction: 0.3650, Planning: 0.2952, Control: 0.3646
Epoch 5: Losses -> Perception: 1.4151, Prediction: 0.3580, Planning: 0.2768, Control: 0.4983


In [1]:
1

1