In [63]:
import cv2
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import Dataset

from torchvision import transforms

from PIL import Image

In [64]:
def extract_frames(video_path, output_folder, output_csv):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Check if the video opened successfully
    if not cap.isOpened():
        print("Error opening video file")
        return
    
    # Create an output folder if it doesn't exist
    import os
    os.makedirs(output_folder, exist_ok=True)
    
    frame_count = 0
    frames_info = []
    
    while True:
        # Read a frame from the video file
        ret, frame = cap.read()
        
        if not ret:
            break
        
        frame_count += 1
        frame_info = {'Frame': frame_count, 'Dimensions': frame.shape[:2]}
        frames_info.append(frame_info)
        
        # Save frame as image file
        frame_filename = f"{output_folder}/frame_{frame_count:04d}.jpg"
        cv2.imwrite(frame_filename, frame)
    
    # Release the VideoCapture object and close all windows
    cap.release()

    # Convert frames_info to DataFrame
    df = pd.DataFrame(frames_info)
    
    # Save DataFrame to CSV file
    csv_filename = f"{output_csv}/frame_info.csv"
    df.to_csv(csv_filename, index=False)
    
    return frame_count

In [65]:
video_path = '/kaggle/input/q3-computer-vision/train.mp4'
output_folder = '/kaggle/working/frames_train'
output_csv = '/kaggle/working/'

In [66]:
total_frames = extract_frames(video_path, output_folder, output_csv)
print(f"Total frames extracted: {total_frames}")

KeyboardInterrupt: 

In [None]:
video_path = '/kaggle/input/q3-computer-vision/test.mp4'
output_folder = '/kaggle/working/frames_test'
output_csv = '/kaggle/working/'

In [None]:
total_frames = extract_frames(video_path, output_folder, output_csv)
print(f"Total frames extracted test: {total_frames}")

In [None]:
def convertToOptical(prev_image, curr_image):
    prev_image_gray = cv2.cvtColor(prev_image, cv2.COLOR_BGR2GRAY)
    curr_image_gray = cv2.cvtColor(curr_image, cv2.COLOR_BGR2GRAY)

    flow = cv2.calcOpticalFlowFarneback(prev_image_gray, curr_image_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)

    hsv = np.zeros_like(prev_image)
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    hsv[..., 0] = ang * 180 / np.pi / 2
    hsv[..., 1] = 255
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    flow_image_bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    return flow_image_bgr

In [None]:
def compute_optical_flow(input_folder, output_folder):
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    frame_files = sorted([f for f in os.listdir(input_folder) if f.endswith(('.png', '.jpg', '.jpeg'))])
    
    prev_frame = None
    for i, frame_file in enumerate(tqdm(frame_files, desc="Processing frames")):
        frame_path = os.path.join(input_folder, frame_file)
        curr_frame = cv2.imread(frame_path)
        
        if prev_frame is not None:
            optical_flow_image = convertToOptical(prev_frame, curr_frame)
            optical_flow_filename = os.path.join(output_folder, f"optical_flow_{i:04d}.jpg")
            cv2.imwrite(optical_flow_filename, optical_flow_image)
        
        prev_frame = curr_frame

In [None]:
input_folder = '/kaggle/working/frames_train'
output_folder = '/kaggle/working/optical_flows_train'

In [None]:
compute_optical_flow(input_folder, output_folder)
print(f"Optical flow images saved to: {output_folder}")

In [None]:
input_folder = '/kaggle/working/frames_test'
output_folder = '/kaggle/working/optical_flows_test'

In [None]:
compute_optical_flow(input_folder, output_folder)
print(f"Optical flow images saved to: {output_folder}")

In [None]:
class OpticalFlowDataset(Dataset):
    def __init__(self, optical_flow_dir, velocity_file):
        self.optical_flow_dir = optical_flow_dir
        
        # Get and sort the optical flow files
        self.optical_flow_files = sorted([f for f in os.listdir(optical_flow_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])
        
        # Read velocities from the text file, skipping the first line
        with open(velocity_file, 'r') as file:
            # Skip the first line
            file.readline()
            self.velocities = [float(line.strip()) for line in file]
        
        # Ensure there are the same number of files in both directories
        assert len(self.optical_flow_files) == len(self.velocities), "Mismatch between optical flow files and velocity values count."
        
        # Define transformations (resize and normalize)
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.optical_flow_files)
    
    def __getitem__(self, idx):
        optical_flow_path = os.path.join(self.optical_flow_dir, self.optical_flow_files[idx])
        
        # Read the optical flow image
        optical_flow = cv2.imread(optical_flow_path)
        
        # Convert the NumPy array to a PIL Image
        optical_flow_pil = Image.fromarray(cv2.cvtColor(optical_flow, cv2.COLOR_BGR2RGB))
        
        # Apply transformations
        optical_flow_resized = self.transform(optical_flow_pil)
        
        # Get the velocity value and scale it between 0 and 1
        velocity = self.velocities[idx]
        velocity_scaled = (velocity - min(self.velocities)) / (max(self.velocities) - min(self.velocities))
        
        # Convert to tensors
        velocity_tensor = torch.tensor([velocity_scaled], dtype=torch.float32)
        
        return optical_flow_resized, velocity_tensor

In [None]:
train_optical_flow_dir = '/kaggle/working/optical_flows_train'
train_velocity_file = '/kaggle/input/q3-computer-vision/train.txt'
test_optical_flow_dir = '/kaggle/working/optical_flows_test'
test_velocity_file = '/kaggle/input/q3-computer-vision/test.txt'

In [None]:
train_dataset = OpticalFlowDataset(train_optical_flow_dir, train_velocity_file)
test_dataset = OpticalFlowDataset(test_optical_flow_dir, test_velocity_file)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
# Access an element from train dataset
train_optical_flow, train_velocity = train_dataset[100]
print("Train velocity:", train_velocity.item())
print("Train optical flow shape:", train_optical_flow.shape)

In [None]:
# Plot the optical flow image
plt.figure(figsize=(8, 6))
plt.imshow(cv2.cvtColor(train_optical_flow.numpy().transpose(1, 2, 0), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title('Optical Flow')
plt.show()

In [None]:
# Define the enhanced CNN + MLP model
class OpticalFlowCNN(nn.Module):
    def __init__(self):
        super(OpticalFlowCNN, self).__init__()
        
        # CNN Backbone
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=2, stride=2)
            
            
        )
        
        # MLP Head
        self.mlp = nn.Sequential(
            nn.Linear(50176, 2048),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            nn.Linear(2048, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            nn.Linear(128, 1)  # Output is a single scalar (velocity)
        )
    
    def forward(self, x):
        # CNN Backbone
        x = self.cnn(x)
        
        # Flatten the CNN output
        x = x.view(x.size(0), -1)
        
        # MLP Head
        x = self.mlp(x)
        
        return x

In [None]:
# Initialize model, optimizer, and criterion
model = OpticalFlowCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [None]:
def train_and_evaluate(model, train_loader, val_loader, optimizer, criterion, num_epochs=5, device='cuda', output_csv='eval_outputs.csv'):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Open the CSV file in write mode initially to write the header
    with open(output_csv, 'w', newline='') as csvfile:
        fieldnames = ['epoch', 'input_id', 'prediction', 'actual']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        epoch_train_loss = 0.0
        with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Train)", unit="batch") as t:
            for inputs, targets in t:
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                epoch_train_loss += loss.item()
                t.set_postfix(train_loss=loss.item())
        
        avg_train_loss = epoch_train_loss / len(train_loader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}")
        
        # Evaluation phase
        model.eval()
        epoch_val_loss = 0.0
        all_eval_outputs = []
        with tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Val)", unit="batch") as t:
            for idx, (inputs, targets) in enumerate(t):
                with torch.no_grad():
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    epoch_val_loss += loss.item()
                    t.set_postfix(val_loss=loss.item())

                    # Save evaluation outputs
                    for i in range(len(outputs)):
                        all_eval_outputs.append({
                            'epoch': epoch + 1,
                            'input_id': idx * val_loader.batch_size + i,
                            'prediction': outputs[i].item(),
                            'actual': targets[i].item()
                        })
        
        avg_val_loss = epoch_val_loss / len(val_loader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Val Loss: {avg_val_loss:.4f}")

        # Save results to the CSV file after each epoch
        with open(output_csv, 'a', newline='') as csvfile:
            fieldnames = ['epoch', 'input_id', 'prediction', 'actual']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerows(all_eval_outputs)

In [None]:
# Train the model with evaluation
train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, num_epochs=10)

In [70]:
# Function to read the original min and max velocities from the dataset
def get_velocity_scale_params(velocity_file):
    with open(velocity_file, 'r') as file:
        file.readline()  # Skip the first line
        velocities = [float(line.strip()) for line in file]
    return min(velocities), max(velocities)

In [71]:
def read_results(results_file):
    predictions = []
    actuals = []
    with open(results_file, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            if(row[0] == 'epoch'):
                continue
            epoch = int(row[0])
            if epoch == 1:
                predictions.append(float(row[2]))
                actuals.append(float(row[3]))
    return predictions, actuals

In [75]:
# Function to create video from optical flow frames and velocity results
def create_video_from_results(optical_flow_dir, results_file, velocity_file, output_video_path):
    # Read predictions and actuals from results file
    predictions, actuals = read_results(results_file)

    # Get the sorted list of optical flow files
    optical_flow_files = sorted([f for f in os.listdir(optical_flow_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])

    # Get original min and max velocities
    min_velocity, max_velocity = get_velocity_scale_params(velocity_file)

    # Initialize the video writer
    first_frame = cv2.imread(os.path.join(optical_flow_dir, optical_flow_files[0]))
    height, width, layers = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 20.0, (width, height))  # Adjust frame size and fps as needed

    for i in tqdm(range(1, len(optical_flow_files)), desc="Creating video"):
        optical_flow_path = os.path.join(optical_flow_dir, optical_flow_files[i])
        optical_flow = cv2.imread(optical_flow_path)
        
        # De-scale velocities
        prediction = predictions[i - 1] * (max_velocity - min_velocity) + min_velocity  # Because first frame is additional
        target = actuals[i - 1] * (max_velocity - min_velocity) + min_velocity

        # Add text to frame
        cv2.putText(optical_flow, f'Predicted Velocity: {prediction:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        cv2.putText(optical_flow, f'Target Velocity: {target:.2f}', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        
        # Write frame to video
        out.write(optical_flow)
    
    out.release()

In [76]:
create_video_from_results('/kaggle/working/frames_test', '/kaggle/input/test-frames-velocities/eval_outputs.csv', '/kaggle/input/q3-computer-vision/test.txt', 'output_video_origin.avi')

Creating video: 100%|██████████| 10797/10797 [00:50<00:00, 213.81it/s]


In [None]:
# google drive links
# https://drive.google.com/drive/folders/1Z8poyzyLqpcpfrdRJIKhL-pzEHalxnX6?usp=drive_link