Link to the output videos:
https://drive.google.com/drive/folders/1Rx1B4Yh8mU1Xba17BvnsSSCxJgRauAid?usp=share_link

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import os
from tqdm import tqdm
from torchvision import transforms
import pandas as pd
from sklearn.metrics import mean_absolute_error, r2_score

In [2]:
def extract_optical_flow_and_save(video_path, save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    ret, prev_frame = cap.read()
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    optical_flows = []

    for frame_idx in tqdm(range(total_frames), desc=f'Processing {video_path}'):
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        hsv = np.zeros_like(frame)
        hsv[..., 1] = 255
        hsv[..., 0] = angle * 180 / np.pi / 2
        hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
        optical_flow_frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
        # optical_flows.append(optical_flow_frame)

        output_filename = os.path.join(save_path, f'optical_flow_frame_{frame_idx:04d}.png')
        cv2.imwrite(output_filename, optical_flow_frame)

        prev_gray = gray

    cap.release()
    return optical_flows

save_path = '/kaggle/working/train_optical'
extract_optical_flow_and_save('/kaggle/input/optical-flow-dataset/train.mp4', save_path)

Processing /kaggle/input/optical-flow-dataset/train.mp4: 100%|█████████▉| 20399/20400 [37:01<00:00,  9.18it/s]


[]

In [4]:
def read_speed_data(file_path):
    with open(file_path, 'r') as file:
        speeds = [float(line.strip()) for line in file.readlines()]
    return speeds

train_speeds = read_speed_data('/kaggle/input/optical-flow-dataset/train.txt')

In [8]:

class OpticalFlowDatasetFromImages(Dataset):
    def __init__(self, image_dir, speed_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.speed_labels = self._load_speed_labels(speed_file)
        self.image_files = sorted(os.listdir(image_dir))

    def _load_speed_labels(self, speed_file):
        speeds = pd.read_csv(speed_file, header=None)
        #remove the first one since the optical flow is not computed for first frame
        speeds = speeds[1:]
        return speeds.values.flatten()

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_files[idx])
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (0,0), fx=0.5, fy=0.5)
        speed = self.speed_labels[idx]

        if self.transform:
            image = self.transform(image)

        return {'optical_flow': image, 'speed': speed}

transform = transforms.Compose([
    transforms.ToTensor() , 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 
])

train_image_dir = '/kaggle/working/train_optical'
train_speed_file = '/kaggle/input/optical-flow-dataset/train.txt'
# test_image_dir = '/content/drive/My Drive/test_optical'
# test_speed_file = '/content/drive/My Drive/test.txt'

train_dataset = OpticalFlowDatasetFromImages(train_image_dir, train_speed_file, transform=transform)
# test_dataset = OpticalFlowDatasetFromImages(test_image_dir, test_speed_file, transform=transform)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
# test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SpeedPredictorCNN(nn.Module):
    def __init__(self):
        super(SpeedPredictorCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 24, kernel_size=5, stride=2, padding=2)  
        self.conv2 = nn.Conv2d(24, 36, kernel_size=5, stride=2, padding=2) 
        self.conv3 = nn.Conv2d(36, 48, kernel_size=5, stride=2, padding=2) 
        self.conv4 = nn.Conv2d(48, 64, kernel_size=3, stride=1, padding=1)  
        self.fc1 = nn.Linear(64 * 30 * 40, 100) 
        self.fc2 = nn.Linear(100, 50) 
        self.fc3 = nn.Linear(50, 10)  
        self.fc4 = nn.Linear(10, 1) 
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))  
        x = self.fc4(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SpeedPredictorCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

print(model)

SpeedPredictorCNN(
  (conv1): Conv2d(3, 24, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (conv2): Conv2d(24, 36, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (conv3): Conv2d(36, 48, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (conv4): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=76800, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=10, bias=True)
  (fc4): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)


In [10]:
num_epochs = 5

from tqdm import tqdm

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
        for batch in train_dataloader:
            images = batch['optical_flow'].to(device)
            speeds = batch['speed'].float().to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, speeds.unsqueeze(1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

            pbar.update(1)

    epoch_loss = running_loss / len(train_dataloader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')


Epoch 1/5: 100%|██████████| 160/160 [02:52<00:00,  1.08s/batch]


Epoch 1/5, Loss: 80.5129


Epoch 2/5: 100%|██████████| 160/160 [02:51<00:00,  1.07s/batch]


Epoch 2/5, Loss: 62.3347


Epoch 3/5: 100%|██████████| 160/160 [02:51<00:00,  1.07s/batch]


Epoch 3/5, Loss: 47.2574


Epoch 4/5: 100%|██████████| 160/160 [02:51<00:00,  1.07s/batch]


Epoch 4/5, Loss: 40.0253


Epoch 5/5: 100%|██████████| 160/160 [02:50<00:00,  1.07s/batch]

Epoch 5/5, Loss: 34.2183





In [11]:
save_path = '/kaggle/working/test_optical'
extract_optical_flow_and_save('/kaggle/input/optical-flow-dataset/test.mp4', save_path)

Processing /kaggle/input/optical-flow-dataset/test.mp4: 100%|█████████▉| 10797/10798 [18:17<00:00,  9.84it/s]


[]

In [30]:
test_image_dir = '/kaggle/working/test_optical'
test_speed_file = '/kaggle/input/optical-flow-dataset/test.txt'

test_dataset = OpticalFlowDatasetFromImages(test_image_dir, test_speed_file, transform=transform)

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [37]:

def create_videos_with_predictions(model, test_video_path, optical_flow_dir, output_path_original, output_path_combined, transform):
    cap = cv2.VideoCapture(test_video_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out_original = cv2.VideoWriter(output_path_original, fourcc, 20.0, (640, 480))
    out_combined = cv2.VideoWriter(output_path_combined, fourcc, 20.0, (1280, 480))

    model.eval()
    frame_idx = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    for frame_idx in tqdm(range(total_frames-1), desc='Processing video frames'):
        ret, frame = cap.read()
        if not ret:
            break

        optical_flow_path = os.path.join(optical_flow_dir, f'optical_flow_frame_{frame_idx:04d}.png')
        optical_flow_frame = cv2.imread(optical_flow_path)
        optical_flow_frame2 = cv2.cvtColor(optical_flow_frame, cv2.COLOR_BGR2RGB)
        optical_flow_frame2 = cv2.normalize(optical_flow_frame2, None, alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        optical_flow_frame2 = cv2.resize(optical_flow_frame2, (0,0), fx=0.5, fy=0.5)
        input_tensor = transform(optical_flow_frame2).unsqueeze(0).float().to(device)
        
        with torch.no_grad():
            speed_prediction = model(input_tensor).item()

        original_frame_with_speed = frame.copy()
        cv2.putText(original_frame_with_speed, f'Speed: {speed_prediction:.2f}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        out_original.write(original_frame_with_speed)

        combined_frame = cv2.hconcat([frame, optical_flow_frame])
        combined_frame_with_speed = combined_frame.copy()
        cv2.putText(combined_frame_with_speed, f'Speed: {speed_prediction:.2f}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        out_combined.write(combined_frame_with_speed)

    cap.release()
    out_original.release()
    out_combined.release()


test_video_path = '/kaggle/input/optical-flow-dataset/test.mp4'
output_video_path_original = '/kaggle/working/test_with_predictions_original.avi'
output_video_path_combined = '/kaggle/working/test_with_predictions_combined.avi'
test_image_dir='/kaggle/working/test_optical'
create_videos_with_predictions(model, test_video_path, test_image_dir, output_video_path_original, output_video_path_combined, transform)

Processing video frames: 100%|██████████| 10797/10797 [03:06<00:00, 57.87it/s]


In [28]:
from IPython.display import FileLink

print("Original video with predictions:")
linke = display(FileLink(r'test_with_predictions_original.avi'))

print("Combined video with predictions:")
display(FileLink(r'test_with_predictions_combined.avi'))

Original video with predictions:


Combined video with predictions:


In [36]:
model.eval()
model.to(device)
actuals = []
predictions = []
with torch.no_grad():
    for data in test_dataloader:
        inputs, labels = data['optical_flow'].to(device), data['speed'].to(device)
        inputs = inputs.float()
        labels = labels.float().view(-1, 1)

        outputs = model(inputs)
        actuals.extend(labels.cpu().numpy())
        predictions.extend(outputs.cpu().numpy())

mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)
print(f'Mean Absolute Error: {mae:.3f}')

Mean Absolute Error: 4.822
