In [None]:
import os
import cv2
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader, random_split

In [None]:
class ImitationDataset(Dataset):
    def __init__(self, rgb_dir, seg_dir, log_path, transforms=None):
        self.rgb_dir = rgb_dir
        self.seg_dir = seg_dir
        self.log_path = log_path
        self.transform = transforms
        
        with open(log_path, 'r') as f:
            self.log_data = json.load(f)
            
    def __len__(self):
        return len(self.log_data)
    
    def __getitem__(self, index):
        rgb_image_path = os.path.join(self.rgb_dir, f"{index:05d}.png")
        seg_image_path = os.path.join(self.seg_dir, f"{index:05d}.png")
        
        rgb_image = Image.open(rgb_image_path)
        
        if self.transform:
            rgb_tensor = self.transform(rgb_image)
        else:
            rgb_tensor = transforms.ToTensor()(rgb_image)
            
        seg_image = np.array(Image.open(seg_image_path))
        
        lane_mask = np.all(seg_image == [0, 255, 0], axis=2).astype(np.uint8)
        obs_mask = np.all(seg_image == [255, 0, 0], axis=2).astype(np.uint8)
        
        seg_tensor = torch.tensor(np.stack([lane_mask, obs_mask], axis=0), dtype=torch.float32)
        
        if seg_tensor.shape[1:] != rgb_tensor.shape[1:]:
            seg_tensor = F.interpolate(
                seg_tensor.unsqueeze(0),
                size=rgb_tensor.shape[1:],
                mode='nearest'
            ).squeeze(0)
        
        input_tensor = torch.cat([rgb_tensor, seg_tensor], dim=0)
                        
        control = self.log_data[index]
        control_tensor = torch.tensor([
            control['steer'],
            control['throttle'],
            control['brake']
        ], dtype=torch.float32)
        
        return input_tensor, control_tensor
        


In [48]:
class ImitationCNN(nn.Module):
    def __init__(self):
        super(ImitationCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(5, 32, kernel_size=5, stride=2, padding=2)  # [B, 32, 180, 320]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)  # [B, 64, 90, 160]
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) # [B, 128, 45, 80]
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) # [B, 256, 23, 40]

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 23 * 40, 512)  # Flattened output from conv4 (keep the 23)
        self.fc2 = nn.Linear(512, 3)  # Output: steer, throttle, brake

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        out = self.fc2(x)
        
        return out


In [None]:
rgb_dir = 'rgb_image'
seg_dir = 'seg_image'
log_path = 'logs/logs.json'

dataset = ImitationDataset(rgb_dir, seg_dir, log_path, None)
train_loader = DataLoader(dataset, 32, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ImitationCNN().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        
        outputs= model(inputs)
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

        

In [None]:
model.eval()  # Set the model to evaluation mode
steer_error = 0
throttle_error = 0
brake_error = 0

with torch.no_grad():  # No gradient calculation during evaluation
    for inputs, targets in train_loader:  # Using train_loader for demonstration
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)

        steer_error += F.mse_loss(outputs[:, 0], targets[:, 0], reduction='sum').item()
        throttle_error += F.mse_loss(outputs[:, 1], targets[:, 1], reduction='sum').item()
        brake_error += F.mse_loss(outputs[:, 2], targets[:, 2], reduction='sum').item()

n = len(train_loader.dataset)  # Total number of samples in the dataset

print(f"Steer MSE: {steer_error/n:.4f}, Throttle MSE: {throttle_error/n:.4f}, Brake MSE: {brake_error/n:.4f}")




In [15]:

log_path = r'logs/logs.json'
with open(log_path, 'r') as f:
    log_data = json.load(f)

In [112]:
x = int(0000 + 9999)
print(log_data[x])

{'img': '09999.png', 'steer': -0.002173948334529996, 'throttle': 0.7269210815429688, 'brake': 0.0, 'imu': {'orientation': {'w': 0.13457519332538814, 'x': 0.0, 'y': 0.0, 'z': -0.9909033844636087}, 'orientation_covariance': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'angular_velocity': {'x': -0.0005980819696560502, 'y': -5.1960101700387895e-05, 'z': -0.005580318626016378}, 'angular_velocity_covariance': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'linear_acceleration': {'x': -0.002654331037774682, 'y': -0.014474133029580116, 'z': 9.810001373291016}, 'linear_acceleration_covariance': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, 'gnss': {'latitude': 0.0005778344940239322, 'longitude': 0.0005181960624935497}, 'timestamp': 1744467318.1344137}


In [102]:
len(log_data)

10000

In [20]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

img = Image.open("seg_image/00000.png")
img_np = np.array(img)
lane = np.all(img_np == [0, 255, 0], axis=-1).astype(np.uint8)
obstacle = np.all(img_np == [255, 0, 0], axis=-1).astype(np.uint8)
H, W = lane.shape
color_mask = np.zeros((H, W, 3), dtype=np.uint8)
color_mask[obstacle == 1] = [255, 0, 0]
color_mask[lane == 1] = [0, 255, 0]

# seg_mask = np.array(Image.open(seg_image_path))
seg_mask = img_np

lane_mask = np.all(seg_mask == [0, 255, 0], axis=2).astype(np.uint8)  # H x W
obs_mask = np.all(seg_mask == [255, 0, 0], axis=2).astype(np.uint8)   # H x W

multi_channel_mask = np.stack([lane_mask, obs_mask], axis=0)  # Shape: (2, H, W)
seg_tensor = torch.tensor(multi_channel_mask, dtype=torch.float32)

# plt.imshow(seg_tensor)
# plt.title("Combined Segmentation")
# plt.axis('off')
# plt.show()



In [25]:
seg_image_path = Image.open("seg_image/00000.png")
rgb_image_path = Image.open("rgb_image/00000.png")

# rgb_image = cv2.cvtColor(cv2.imread(rgb_image_path), cv2.COLOR_BGR2RGB)
# rgb_image = Image.fromarray(rgb_image)


# rgb_tensor = transforms.ToTensor()(rgb_image)
rgb_image = np.array(rgb_image_path)

# Convert RGB image to RGB format (if needed)
rgb_image = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2RGB)

# Convert the numpy array to a PIL Image (if needed)
rgb_image = Image.fromarray(rgb_image)

# Convert RGB image to tensor
rgb_tensor = transforms.ToTensor()(rgb_image)

seg_image = np.array(seg_image_path)
lane_mask = np.all(seg_image == [0, 255, 0], axis=2).astype(np.uint8)
obs_mask = np.all(seg_image == [255, 0, 0], axis=2).astype(np.uint8)
seg_tensor = torch.tensor(np.stack([lane_mask, obs_mask], axis=0), dtype=torch.float32)

if seg_tensor.shape[1:] != rgb_tensor.shape[1:]:
    seg_tensor = F.interpolate(
        seg_tensor.unsqueeze(0),
        size=rgb_tensor.shape[1:],
        mode='nearest'
    ).squeeze(0)

input_tensor = torch.cat([rgb_tensor, seg_tensor], dim=0)

In [27]:
print(input_tensor.shape)


torch.Size([5, 360, 640])


In [51]:
import os
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

class ImitationDataset(Dataset):
    def __init__(self, rgb_dir, seg_dir, log_path):
        self.rgb_dir = rgb_dir
        self.seg_dir = seg_dir
        self.log_path = log_path
        with open(log_path, 'r') as f:
            self.log_data = json.load(f)
        
        self.rgb_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.augmentation = transforms.RandomHorizontalFlip(p=0.5)

    def __len__(self):
        return len(self.log_data)

    def __getitem__(self, index):
        rgb_image_path = os.path.join(self.rgb_dir, f"{index:05d}.png")
        seg_image_path = os.path.join(self.seg_dir, f"{index:05d}.png")

        rgb_image = Image.open(rgb_image_path)
        seg_image = Image.open(seg_image_path)

        # Apply augmentation consistently
        seed = torch.randint(0, 2**32, (1,)).item()
        torch.manual_seed(seed)
        rgb_image = self.augmentation(rgb_image)
        torch.manual_seed(seed)
        seg_image = self.augmentation(seg_image)

        rgb_tensor = self.rgb_transform(rgb_image)

        seg_image = np.array(seg_image)
        lane_mask = np.all(seg_image == [0, 255, 0], axis=2).astype(np.uint8)
        obs_mask = np.all(seg_image == [255, 0, 0], axis=2).astype(np.uint8)
        seg_tensor = torch.tensor(np.stack([lane_mask, obs_mask], axis=0), dtype=torch.float32)

        if seg_tensor.shape[1:] != rgb_tensor.shape[1:]:
            seg_tensor = F.interpolate(seg_tensor.unsqueeze(0), size=rgb_tensor.shape[1:], mode='nearest').squeeze(0)

        input_tensor = torch.cat([rgb_tensor, seg_tensor], dim=0)

        control = self.log_data[index]
        control_tensor = torch.tensor([control['steer'], control['throttle'], control['brake']], dtype=torch.float32)

        # Adjust steering if flipped
        if torch.rand(1).item() < 0.5:  # Matches p=0.5 from RandomHorizontalFlip
            control_tensor[0] = -control_tensor[0]

        return input_tensor, control_tensor

class ImitationCNN(nn.Module):
    def __init__(self):
        super(ImitationCNN, self).__init__()
        self.conv1 = nn.Conv2d(5, 32, kernel_size=5, stride=2, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)

        self.fc1 = nn.Linear(256 * 23 * 40, 512)  # Adjust based on input size (e.g., 368x640)
        self.fc2 = nn.Linear(512, 3)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        out = self.fc2(x)

        steer = torch.tanh(out[:, 0])           # [-1, 1]
        throttle = torch.sigmoid(out[:, 1])     # [0, 1]
        brake = torch.sigmoid(out[:, 2])        # [0, 1]
        return torch.stack([steer, throttle, brake], dim=1)

# Dataset and DataLoader
rgb_dir = 'rgb_image'
seg_dir = 'seg_image'
log_path = 'logs/logs.json'

dataset = ImitationDataset(rgb_dir, seg_dir, log_path)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model, loss, optimizer, scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ImitationCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 50
best_val_loss = float('inf')
patience = 3
trigger_times = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Val Loss: {val_loss:.4f}")
    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
        # Optionally save model
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        trigger_times += 1
        if trigger_times >= patience:
            print("Early stopping triggered")
            break

Epoch [1/50], Train Loss: 0.4547
Epoch [1/50], Val Loss: 0.4452
Epoch [2/50], Train Loss: 0.4557
Epoch [2/50], Val Loss: 0.4452
Epoch [3/50], Train Loss: 0.4552
Epoch [3/50], Val Loss: 0.4456
Epoch [4/50], Train Loss: 0.4549
Epoch [4/50], Val Loss: 0.4462
Early stopping triggered
