<a href="https://colab.research.google.com/github/AbdulrhmnGhanem/bdd100k-trajectories/blob/main/nth_frame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook we are going to desgin the algorithm to get the frame at the `nth` displacement. We haven't settled on the `SAMPLING_DISTANCE` value, but I will prototype with `SAMPLING_DISTANCE = 10`.

In [None]:
# Reproducible-ish dependencies
!pip install \
    pandas==1.3.5 \
    requests==2.23.0 \
    geopy==1.17.0 \
    imageio==2.4.1 \
    Pillow==7.1.2

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from getpass import getpass
API_KEY = getpass('Enter the google maps API key: ')

Enter the google maps API key: ··········


## Read the stored `selected_trajectories` file

> The selection criteria was 
> 
> $  50m \le total \ displacement \ along \ the \ trajectory \le 2500m$

In [None]:
import pickle

with open('/content/drive/MyDrive/selected_trajectories.pkl', 'rb') as f:
  selected_trajectories = pickle.load(f) 

In [None]:
from typing import List, Tuple
from itertools import accumulate
from functools import lru_cache
import io

import geopy.distance
import PIL
import imageio
import requests
from numba import njit, typed, types



ROUNDING_NDIGITS = 2


def trajectory_to_points(trajectory: Tuple[str, List]) -> List[Tuple[float, float]]:
  "Return list of GPS points from a trajectory"
  return [(point["latitude"], point["longitude"]) for point in trajectory[1]]

def get_trajectory_running_distance(trajectory: Tuple[str, List]) -> List[float]:
  """
  Get the running distance of the trajectory at each second of the video
  :param: points - list of GPS points, [](latitude, longitude)
  """
  points = trajectory_to_points(trajectory)
  # prepadding a zero at the beginning because the running distance at the start is zero.
  adjacent_distance = [0] + [geopy.distance.distance(p1, p2).meters for p1, p2 in zip(points, points[1:])]
  running_distance = accumulate(adjacent_distance)
  return [round(d, ROUNDING_NDIGITS) for d in running_distance]


def mark_trajectory(points, *,zoom, scale, size):
  base_url = "https://maps.googleapis.com/maps/api/staticmap"
  key=API_KEY
  markers = '|'.join(f"{x}, {y}" for x, y in points)

  img_url = f"{base_url}?key={key}&zoom={zoom}&scale={scale}" \
                f"&size={size}&maptype=satellite&format=png" \
                f"&visual_refresh=true&markers={markers}"
  return PIL.Image.open(
      io.BytesIO(requests.get(img_url).content)).convert('RGB')

@lru_cache(maxsize=1)
def load_total_distance_df():
  with open('/content/drive/MyDrive/total_distance_df.pkl', 'rb') as f:
    df = pickle.load(f)
  return df

@lru_cache()
def get_trajectory_total_distance(trajectory_name: str):
  df = load_total_distance_df()
  return round(df.loc[trajectory_name][0], ROUNDING_NDIGITS)

def get_average_speeds(trajectory):
  """
  vn→n+1
  The speed beween two adjacent points
  """
  points_speed = [p["speed"] for p in sample_trajectory[1]]
  return [round((s1+s2)/2, ROUNDING_NDIGITS) for s1, s2 in zip(points_speed, points_speed[1:])]

In [None]:
from numpy.testing import assert_almost_equal

sample_trajectory = selected_trajectories[0]

trajectory_running_distance = get_trajectory_running_distance(sample_trajectory)
total_distance_using_running_distance = trajectory_running_distance[-1]
total_distance_pre_computed = get_trajectory_total_distance(sample_trajectory[0])

assert_almost_equal(total_distance_using_running_distance, total_distance_pre_computed)

So we can concloude the running average calculations are right! Now let's figure out how to get the time of the frame.

> ### Division
* Assuming we managed to divide the trajectories into $50m$ sub-trajectories.
* We have the speed at each point, and we can assume constant speed between adjacent points of the average speed at these point $v_{n → n+1} = {{v_{n} + v_{n+1} \over 2}, \ n \in \{1, \ 2, \ 3, \ \ldots, \lfloor{trajectory \ distace \over SAMPLING\_DISTANCE} \}}$.
* The time between adjacent points is $1s$.
* To get the time of corresponding frame for the $nth$ point we can use this formula $t_n={d \times n \over v_{n → n+1}} , \ n \in \{1, \  2, \  3, \  \ldots, \lfloor{trajectory \ distace \over SAMPLING\_DISTANCE} \land t_n \gt t_{n-1}\}$.
* Then pass the list of $t_n$s to `ffmpeg` to extract the frames. 

> https://colab.research.google.com/drive/1RVCX9pociA3bU9IEeJNz0xaboCKWqYoq#scrollTo=zsx25ZJ-8PTK&line=9&uniqifier=1

In [None]:
import numpy as np

SAMPLING_DISTANCE = 10


@njit
def get_frames_timestamps(running_distance, average_speeds, sampling_points):
  frames_times = [0]
  for sampling_distance in sampling_points:
    for idx, d in enumerate(running_distance):
      if d >= sampling_distance:
        # If the running distance greater than current sampling point then
        # the frame is between the currrent point and the previous point.
        distance_at_the_previos_point = running_distance[idx - 1]
        speed_at_the_previos_point    = average_speeds[idx - 1]
        if speed_at_the_previos_point == 0:
          break
        # the time equals the time at the previous point + the time to reach the sampling_distance
        # we can use the idx of the previous point as the time at the prious point; the time between adjacent points is 1s
        time = (idx - 1) + ((sampling_distance - distance_at_the_previos_point) / speed_at_the_previos_point)
        time_ms = int(time * 1000)
        if time_ms <= frames_times[-1]:
          break
        frames_times.append(time_ms)
        break
    else:
        raise ValueError("Unreachable")
  
  return frames_times

In [None]:
trajectory_running_distance = np.asarray(get_trajectory_running_distance(sample_trajectory))
trajectory_total_distance = int(get_trajectory_running_distance(sample_trajectory)[-1])
trajectory_sampling_points = np.asarray(range(0, trajectory_total_distance, SAMPLING_DISTANCE)[1:])
trajectory_average_speeds = np.asarray(get_average_speeds(sample_trajectory))
get_frames_timestamps(trajectory_running_distance, trajectory_average_speeds, trajectory_sampling_points)

[0,
 1146,
 2199,
 3114,
 3953,
 4731,
 5717,
 6628,
 7514,
 8445,
 9416,
 10395,
 11449,
 12823,
 14205,
 15575,
 17143,
 18916,
 24121,
 28031,
 28561,
 30427,
 31631,
 32612,
 33532,
 34262,
 35149,
 36021,
 36807,
 37594,
 38314]

# Generate the sampling timestamps for all trajectories

In [None]:
from datetime import datetime

def format_timestamps(timestamps):
  """Format ms to ffmpeg timestamps"""
  return [datetime.fromtimestamp(t/1000).strftime("00:00:%S.%f")[:-3] for t in timestamps]


def trajectory_with_timestamps(trajectory):
  trajectory_running_distance = np.asarray(get_trajectory_running_distance(trajectory))
  trajectory_total_distance = int(get_trajectory_running_distance(trajectory)[-1])
  trajectory_sampling_points = np.asarray(range(0, trajectory_total_distance, SAMPLING_DISTANCE)[1:])
  trajectory_average_speeds = np.asarray(get_average_speeds(trajectory))
  timestamps = get_frames_timestamps(trajectory_running_distance, trajectory_average_speeds, trajectory_sampling_points)
  return trajectory[0], format_timestamps(timestamps)

In [None]:
selected_trajectories_with_timestamps = [trajectory_with_timestamps(trajectory) for trajectory in selected_trajectories]

### Store the results

In [None]:
with open('/content/drive/MyDrive/trajectories_with_timestamps.pkl', 'wb') as f:
  pickle.dump(dict(selected_trajectories_with_timestamps), f)