In [25]:
!git clone https://github.com/JacobFV/calib_challenge

Cloning into 'calib_challenge'...
remote: Enumerating objects: 80, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 80 (delta 17), reused 23 (delta 15), pack-reused 51[K
Receiving objects: 100% (80/80), 358.09 MiB | 16.38 MiB/s, done.
Resolving deltas: 100% (21/21), done.


In [27]:
cd calib_challenge

/content/calib_challenge


In [28]:
!git rm commavq

rm 'commavq'


In [29]:
!git submodule add https://github.com/commaai/commavq

Cloning into '/content/calib_challenge/commavq'...
remote: Enumerating objects: 173, done.[K
remote: Counting objects: 100% (82/82), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 173 (delta 41), reused 61 (delta 32), pack-reused 91[K
Receiving objects: 100% (173/173), 78.60 MiB | 10.84 MiB/s, done.
Resolving deltas: 100% (66/66), done.


In [30]:
pip install einops torch



In [34]:
!python notebook.py

Traceback (most recent call last):
  File "/content/calib_challenge/notebook.py", line 78, in <module>
    train_loader = load_ds()
  File "/content/calib_challenge/notebook.py", line 54, in load_ds
    video_frames = extract_frames(f"labeled/{i}.hevc")
  File "/content/calib_challenge/notebook.py", line 38, in extract_frames
    ret, frame = cap.read()
KeyboardInterrupt
^C


In [35]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import cv2

# Adjust the import paths for custom modules
from commavq.utils.vqvae import Encoder, CompressorConfig

In [36]:
# Define a simple two-layer MLP with integrated Encoder
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, encoder):
        super(MLP, self).__init__()
        self.encoder = encoder  # Pretrained encoder
        self.layer1 = nn.Linear(input_dim, 128)  # First hidden layer
        self.relu = nn.ReLU()
        self.layer2_res = nn.Linear(128, 128)  # Residual stream
        self.layer2_relu = nn.Linear(128, 128)  # ReLU stream
        self.merge = nn.Linear(256, 128)  # Merge layer
        self.downproject = nn.Linear(128, output_dim)  # Downproject layer

    def forward(self, x):
        with torch.no_grad():
            x = self.encoder(x)
            x = x.float() / config.vocab_size  # Normalize encoder output
        x = self.layer1(x)
        x_res = self.layer2_res(x)
        x_relu = self.relu(self.layer2_relu(x))
        x_merged = torch.cat((x_res, x_relu), dim=1)
        x_merged = self.merge(x_merged)
        x = self.downproject(x_merged)
        return x

In [37]:
def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = frame / 255.0
        frames.append(frame)
    cap.release()
    frames = np.array(frames)
    frames = frames.transpose((0, 3, 1, 2))  # Change from B H W C to B C H W
    return frames

def load_ds():
    # Load all labeled data
    all_frames = []
    all_angles = []
    for i in range(5):  # Assuming 5 labeled videos (0.hevc to 4.hevc)
        video_frames = extract_frames(f"labeled/{i}.hevc")
        angles = np.loadtxt(f"labeled/{i}.txt")
        assert len(video_frames) == len(angles), f"Mismatch in frames and angles count for video {i}!"
        all_frames.append(video_frames)
        all_angles.append(angles)

    # Concatenate all data
    X = np.concatenate(all_frames, axis=0)
    y = np.concatenate(all_angles, axis=0)

    # Remove samples where speed is less than 4m/s (NaN values)
    valid_indices = ~np.isnan(y).any(axis=1)
    X = X[valid_indices]
    y = y[valid_indices]

    # Create TensorDataset
    train_tensor = TensorDataset(
        torch.tensor(X, dtype=torch.float32),
        torch.tensor(y, dtype=torch.float32),
    )
    train_loader = DataLoader(train_tensor, batch_size=64, shuffle=True)
    return train_loader

In [38]:
# Load data
train_loader = load_ds()

DEVICE_NAME = "cuda" if torch.cuda.is_available() else "cpu"

# Load and configure the encoder
config = CompressorConfig()
encoder = Encoder(config)
encoder.load_state_dict_from_url(
    "https://huggingface.co/commaai/commavq-gpt2m/resolve/main/encoder_pytorch_model.bin",
    assign=True,
)
encoder = encoder.eval().to(device=DEVICE_NAME)

# Initialize the Predictor with the correct dimensions and integrated encoder
predictor = MLP(input_dim=196, output_dim=2, encoder=encoder).to(DEVICE_NAME)

# Training loop
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(predictor.parameters(), lr=0.001)

num_epochs = 10  # You can adjust this

In [39]:
for epoch in range(num_epochs):
    total_loss = 0
    for inputs, targets in train_loader:
        inputs = inputs.to(DEVICE_NAME)
        targets = targets.to(DEVICE_NAME)
        predictions = predictor(inputs)
        loss = criterion(predictions, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.8f}")

# Save the trained model
torch.save(predictor.state_dict(), "predictor_model.pth")

print("Training completed and model saved.")

Epoch 1/10, Average Loss: 0.0005
Epoch 2/10, Average Loss: 0.0001
Epoch 3/10, Average Loss: 0.0001
Epoch 4/10, Average Loss: 0.0000
Epoch 5/10, Average Loss: 0.0000
Epoch 6/10, Average Loss: 0.0000
Epoch 7/10, Average Loss: 0.0000
Epoch 8/10, Average Loss: 0.0000
Epoch 9/10, Average Loss: 0.0000
Epoch 10/10, Average Loss: 0.0000
Training completed and model saved.


In [40]:
# Load the trained model
predictor.load_state_dict(torch.load("predictor_model.pth"))
predictor.eval()

def process_unlabeled_videos(model, device):
    for i in range(5, 10):  # Unlabeled videos are 5.hevc to 9.hevc
        video_frames = extract_frames(f"unlabeled/{i}.hevc")
        predictions = []

        with torch.no_grad():
            for frame in video_frames:
                frame_tensor = torch.tensor(frame[np.newaxis, ...], dtype=torch.float32).to(device)
                pred = model(frame_tensor).cpu().numpy()[0]
                predictions.append(pred)

        predictions = np.array(predictions)

        # Save predictions
        np.savetxt(f"{i}.txt", predictions, fmt='%.18e')

    print("Predictions for unlabeled videos generated and saved.")

# Process unlabeled videos and generate predictions
process_unlabeled_videos(predictor, DEVICE_NAME)

Predictions for unlabeled videos generated and saved.


In [43]:
!python eval.py unlabeled/

Traceback (most recent call last):
  File "/content/calib_challenge/eval.py", line 23, in <module>
    test = np.loadtxt(TEST_DIR + str(i) + '.txt')
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/npyio.py", line 1373, in loadtxt
    arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/npyio.py", line 992, in _read
    fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/_datasource.py", line 193, in open
    return ds.open(path, mode, encoding=encoding, newline=newline)
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/_datasource.py", line 533, in open
    raise FileNotFoundError(f"{path} not found.")
FileNotFoundError: unlabeled/0.txt not found.


In [51]:
# prompt: zip labels/* into labels.zip

!zip -r labels.zip labels/*


  adding: labels/5.txt (deflated 87%)
  adding: labels/6.txt (deflated 75%)
  adding: labels/7.txt (deflated 60%)
  adding: labels/8.txt (deflated 90%)
  adding: labels/9.txt (deflated 63%)
