In [None]:
%env DATA_PATH=../../../data
from db import *
from sqlalchemy import insert, select
import glob
import json
import os
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm
from lib.fit import load_fit_file, FIT_EPOCH_S, get_camera_ends, get_camera_starts, get_gps_data
import xml.etree.ElementTree as ET

DATA_PATH = '../../../data'

# Files

### Load

In [None]:
FIT_PATH = f"{DATA_PATH}/virbs/all"
VID_PATH = f"{DATA_PATH}/videos/previews"
fit_files = [p.split('/')[-1] for p in glob.glob(f"{FIT_PATH}/*.fit")]
vid_files = [p.split('/')[-1] for p in glob.glob(f"{VID_PATH}/*.MP4")]

In [None]:
PROJECTS_PATH = '/mnt/c/Users/yusuf/AppData/Roaming/Garmin/VIRB Edit/Database/7/MovieProjects'
RAW_MOVIE_PATH = "/mnt/c/Users/yusuf/AppData/Roaming/Garmin/VIRB Edit/Database/7/RawMovies"

project_files = ['/'.join(p.split('/')[-2:]) for p in glob.glob(f"{PROJECTS_PATH}/*/edited_movie.xml")]
raw_movie_files = ['/'.join(p.split('/')[-2:])for p in glob.glob(f"{RAW_MOVIE_PATH}/*/video.xml")]

In [None]:
raw_movies = {}
for f in raw_movie_files:
    root = ET.parse(f"{RAW_MOVIE_PATH}/{f}").getroot()
    fit = root.find('./TelemetryTypeAssociations/TelemetryTypeAssociation_t/SourceFilePath').text
    preview = root.find('./SourceFiles/MediaSourceFile_t/LowResolutionFilePath').text.split('\\')[-1]
    if preview == "7de890f8-fb7a-49aa-97a7-96eaf53a7a44.MP4": # TODO: script to split large video files from broken rolls
        print("Skipping known double video")
    raw_movies[f.split('/')[0]] = dict(fit=fit, preview=preview)

In [None]:
files = []
for f in project_files:
    root = ET.parse(f"{PROJECTS_PATH}/{f}").getroot()
    raw_movie_id = root.find('./VideoClips/VideoClip_t/RawMovies/RawMovieDisplay_t/RawMovieId').text
    name = root.find('Name').text
    
    if raw_movie_id not in raw_movies:
        print(f"Skipping {name}, raw movie id {raw_movie_id} not found")
        continue
   
    files.append(dict(
        name=name,
        project_id=f.split('/')[0],
        raw_movie_id=raw_movie_id,
        fit_file=raw_movies[raw_movie_id]['fit'],
        preview_file=raw_movies[raw_movie_id]['preview']
    ))
files = sorted(files, key=lambda x: x['name'])
with open("../../../tmp/ids.json", "w") as f:
    json.dump(files, f, indent=2)

## Mapping

In [None]:
def get_data(file):
    sensor, fit_file = file['fit_file'].split('/')
    year, month, day, hour, minute, second = fit_file.split('.')[0].split('-')
    start_time = f"{year}-{month}-{day}T{hour}:{minute}:{second}Z"
    parts = file['name'].split('_')
    date = '_'.join(parts[:3])
    driver = parts[3]
    buggy = parts[4]
    vid_type = "video_preview_c" if parts[-1] == "crotch" else "video_preview"
    if parts[-1] == "crotch":
        parts = parts[:-1]
    num = parts[-1]
    return dict(
        driver=driver,
        buggy=buggy,
        date=date,
        start_time=start_time, num=num,
        vid_type=vid_type,
        preview=file['preview_file'],
        fit_file=fit_file,
        sensor=sensor
    )
file_data = [get_data(f) for f in files]
file_data[0]

In [None]:
dict(
    sensors = set([fd['sensor'] for fd in file_data]),
    dates= set([fd['date'] for fd in file_data]),
    drivers= set([fd['driver'] for fd in file_data]),
    buggies= set([fd['buggy'] for fd in file_data]),
    vid_types= set([fd['vid_type'] for fd in file_data]),
    nums= set([fd['num'] for fd in file_data])
)

In [None]:
sensor_names = {
    3937722707: 'zr',
    3309634073: 'sr',
    3309634073: 'kpf',
    3309634073: 'ir'
}

# DB

In [None]:
session = Session(engine)
driver_map = {
    'Alani': Driver(name='Alani'),
    'Audrey': Driver(name='Audrey'),
    'Cadence': Driver(name='Cadence'),
    'Daisy': Driver(name='Daisy'),
    'Meixi': Driver(name="Mei Xi"),
}

In [None]:
buggy_map = {
    'Inviscid': Buggy(name='Inviscid', abbreviation='inviscid'),
    'KP': Buggy(name='Kingping II', abbreviation='kp'),
    'Seraph': Buggy(name='Seraph', abbreviation='seraph'),
    'Zuke': Buggy(name='Zuke', abbreviation='zuke'),
}

In [None]:
dates_map = {
  '2025_09_20': RollDate(year=2025, month=9, day=20, type=RollType.WEEKEND),
  '2025_09_21': RollDate(year=2025, month=9, day=21, type=RollType.WEEKEND),
  '2025_09_27': RollDate(year=2025, month=9, day=27, type=RollType.WEEKEND),
  '2025_11_02': RollDate(year=2025, month=11, day=2, type=RollType.WEEKEND),
  '2025_11_08': RollDate(year=2025, month=11, day=8, type=RollType.WEEKEND),
}

In [None]:
sensor_map = {
  '3309634073': Sensor(type="virb", name="Inviscid R", abbreviation="3309634073"),
  '3937722707': Sensor(type="virb", name="Zuke R", abbreviation="3937722707"),
  '3953097982': Sensor(type="virb", name="Kingpin F", abbreviation="3953097982"),
  '3957747616': Sensor(type="virb", name="Seraph R", abbreviation="3957747616"),
  '3993910820': Sensor(type="virb", name="Unknown Virb 1", abbreviation="3993910820")
}

In [None]:
session.add_all(driver_map.values())
session.add_all(buggy_map.values())
session.add_all(dates_map.values())
session.add_all(sensor_map.values())
session.flush()

In [None]:
rolls = {}
fit_files = {}

In [None]:
def add_roll(file):
    driver = driver_map[file['driver']]
    buggy = buggy_map[file['buggy']]
    date = dates_map[file['date']]
    num = int(file['num'])
    start_time = datetime.fromisoformat(file['start_time'])
    key = (buggy.id, date.id, num)
    if key in rolls:
        print(f"Duplicate roll for {file['fit_file']} {key}")
        roll = rolls[key]
    else:
        roll = Roll(driver=driver, buggy=buggy, roll_date=date, roll_number=num, start_time=start_time)
        rolls[key] = roll
        
    sensor = sensor_map[file['sensor']]
    roll.roll_files.append(
        RollFile(type=file['vid_type'], uri=f"%videos%/previews/{file['preview']}", sensor=sensor)
    )
    roll.roll_files.append(
        RollFile(type="fit", uri=f"%fit%/all/{file['fit_file']}", sensor=sensor)
    )
    
    session.add(roll)
    session.add_all(roll.roll_files)
    rolls[key] = roll
    return roll

In [None]:
for file in file_data:
    add_roll(file)
session.commit()

# Events

In [None]:
import numpy as np
import geopandas as gpd
import shapely
from shapely import Point
from shapely.ops import nearest_points

hills = gpd.read_file(f"{DATA_PATH}/geo/hills.kml", crs="EPSG:4326")
hills_utm = hills.to_crs(hills.estimate_utm_crs())
hill1, hill2, freeroll, hill3, hill4, hill5, _end = shapely.force_2d(hills.iloc[0].geometry).geoms
hill1_utm, hill2_utm, freeroll_utm, hill3_utm, hill4_utm, hill5_utm, end_utm = shapely.force_2d(hills_utm.iloc[0].geometry).geoms
end_offset = shapely.force_2d(gpd.read_file(f"{DATA_PATH}/geo/end_offset.kml").iloc[0].geometry)
freeroll_point = shapely.force_2d(gpd.read_file(f"{DATA_PATH}/geo/freeroll_point.kml").iloc[0].geometry)

In [None]:
session = Session(engine)

In [None]:
def add_roll_events(fit_file):
    roll = session.get(Roll, fit_file.roll_id)
    if not roll: raise ValueError(f"Roll not found for roll file id {fit_file.roll_id}")
    
    messages = load_fit_file(fit_file.uri.replace('%fit%', "virbs"))
    camera_starts = get_camera_starts(messages)
    if len(camera_starts) != 1:
        print(f"Skipping {fit_file} with {len(camera_starts)} camera starts")
        return 
    
    gps_data = get_gps_data(messages)
    if gps_data is None:
        print(f"Skipping {fit_file} with no gps_data")
        return
    gps_data['speed'] = np.linalg.norm(np.array(gps_data.velocity.to_list()), axis=1)
    
    # set up roll points
    roll_points = shapely.points(gps_data[['position_long', 'position_lat']])
    point_utm = gpd.GeoSeries(roll_points, index=gps_data.index, crs="EPSG:4326")
    point_utm = point_utm.to_crs(point_utm.estimate_utm_crs()) # converts to coordinate systems where distances are nice
    roll_point_timestamp = {p: gps_data.index[i] for i, p in enumerate(roll_points)} # type: ignore
    
    # look at a point in the middle of the freeroll, then find soonest time stopped before that as start of roll
    roll_point_in_freeroll, _ = nearest_points(shapely.union_all(roll_points), freeroll_point)
    before_points = gps_data.loc[:roll_point_timestamp[roll_point_in_freeroll]]
    pre_start_points = before_points[before_points.speed < 0.5]
    roll_start = gps_data.index[0] if len(pre_start_points) < 10 else pre_start_points.timestamp.iloc[-10] # default to start of roll
    
    # buggy might stop at end, so look at a point 10 meters before end and extrapolate time from speed there
    roll_points = shapely.points(gps_data[['position_long', 'position_lat']].loc[roll_start:])
    roll_end_offset_point, _ = nearest_points(shapely.union_all(roll_points), end_offset)
    roll_end_offset = roll_point_timestamp[roll_end_offset_point]
    if gps_data.speed.loc[roll_end_offset] < 0.1:
        roll_end = gps_data.index[-1]
    else:
        roll_end = roll_end_offset + 1000 * (10 / gps_data.speed.loc[roll_end_offset])
        roll_end = gps_data.index[gps_data.index.get_indexer([roll_end], method='nearest')[0]] # type: ignore

    roll_points = shapely.points(gps_data[['position_long', 'position_lat']].loc[roll_start:roll_end])
    roll_line = shapely.union_all(roll_points)
    
    # get nearest points to hill lines
    hill1_point, _ = nearest_points(roll_line, hill1)
    hill2_point, _ = nearest_points(roll_line, hill2)
    freeroll_point_on_roll, _ = nearest_points(roll_line, freeroll)
    hill3_point, _ = nearest_points(roll_line, hill3)
    hill4_point, _ = nearest_points(roll_line, hill4)
    hill5_point, _ = nearest_points(roll_line, hill5)
    
    # get timestamps
    hill1_start = roll_point_timestamp[hill1_point]
    hill2_start = roll_point_timestamp[hill2_point]
    freeroll_start = roll_point_timestamp[freeroll_point_on_roll]
    hill3_start = roll_point_timestamp[hill3_point]
    hill4_start = roll_point_timestamp[hill4_point]
    hill5_start = roll_point_timestamp[hill5_point]
    
    
    roll.roll_events.append(RollEvent(type="roll_start", timestamp_ms=roll_start))
    
    # Distances in meters because of utm coordinate system
    # if the start was close to hill1, make hill 1 start that
    if roll_start != gps_data.index[0] and shapely.distance(point_utm.loc[roll_start], hill1_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="1", timestamp_ms=roll_start))
    elif shapely.distance(point_utm.loc[hill1_start], hill1_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="1", timestamp_ms=hill1_start))     
    # if start was close to hill 2, make hill 2 start that
    if roll_start != gps_data.index[0] and shapely.distance(point_utm.loc[roll_start], hill2_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="2", timestamp_ms=roll_start))
    elif shapely.distance(point_utm.loc[hill2_start], hill2_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="2", timestamp_ms=hill2_start))
    # include starts if close
    if shapely.distance(point_utm.loc[freeroll_start], freeroll_utm) < 10:
        roll.roll_events.append(RollEvent(type="freeroll_start", timestamp_ms=freeroll_start))
    if shapely.distance(point_utm.loc[hill3_start], hill3_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="3", timestamp_ms=hill3_start))
    if shapely.distance(point_utm.loc[hill4_start], hill4_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="4", timestamp_ms=hill4_start))
    if shapely.distance(point_utm.loc[hill5_start], hill5_utm) < 10:
        roll.roll_events.append(RollEvent(type="hill_start", tag="5", timestamp_ms=hill5_start))
    
    
    roll.roll_events.append(RollEvent(type="roll_end", timestamp_ms=roll_end))
    # events at very start or end are probably wrong
    roll.roll_events = [e for e in roll.roll_events if gps_data.index[0] < e.timestamp_ms < gps_data.index[-1]]
    for e in roll.roll_events: e.timestamp_ms = int(e.timestamp_ms)
    print(roll.roll_events)
    session.add_all(roll.roll_events)
    # print(fit_file, roll.roll_events)
    

In [None]:
fit_files = session.execute(select(RollFile).where(RollFile.type == "fit")).scalars().all()

In [None]:
try: 
    for fit_file in tqdm(fit_files):
        add_roll_events(fit_file)
except Exception as e:
    session.rollback()
    raise e
session.commit()

# Download

In [None]:
import json
import requests
from tqdm.notebook import tqdm

with open('./data/ir_11_2/medialist.json') as f:
    media_list = json.load(f)['media'][4:]
media_list

In [None]:
for media in tqdm(media_list):
    url = media['lowResVideoPath']
    # Stream video to file
    response = requests.get(url)
    filename = url.split('/')[-1].replace('GLV', 'mp4')
    with open(f'../../../videos/ir_11_2/{filename}', 'wb') as f:
        f.write(response.content)

# OLD

#### Tests

In [None]:
tmp = raw_movie_files[0]
def fix_path(p):
    return p.replace('C:', '/mnt/c').replace('\\', '/')
ET.parse(f"{RAW_MOVIE_PATH}/{tmp}").getroot().find('./TelemetryTypeAssociations/TelemetryTypeAssociation_t/SourceFilePath').text
fix_path(ET.parse(f"{RAW_MOVIE_PATH}/{tmp}").getroot().find('./SourceFiles/MediaSourceFile_t/LowResolutionFilePath').text)

In [None]:
tmp = project_files[0]

ET.parse(f"{PROJECTS_PATH}/{tmp}").getroot().find('Name').text
ET.parse(f"{PROJECTS_PATH}/{tmp}").getroot().find('./VideoClips/VideoClip_t/RawMovies/RawMovieDisplay_t/RawMovieId').text

### Extract times
(not needed if above works)

In [None]:
message_objs = {}
for fit_file in tqdm(fit_files):
    message_objs[fit_file] = load_fit_file(f"{FIT_PATH}/{fit_file}")

In [None]:
creation_times_list = []
for fit_file, messages in message_objs.items():
    ends = get_camera_ends(messages)
    if (len(ends) != 1): 
        print(f"Skipping {fit_file} with {len(ends)} camera ends")
        continue
    starts = get_camera_starts(messages)
    creation_time = messages['file_id_mesgs'][0]['time_created'] + FIT_EPOCH_S # + ends[0] // 1000
    creation_times_list.append((fit_file, creation_time))
creation_times = dict(sorted(creation_times_list, key=lambda x: x[1]))

In [None]:

{k: datetime.fromtimestamp(v).strftime('%Y-%m-%dT:%H:%M:%SZ') for k, v in creation_times.items()}

In [None]:
[datetime.fromtimestamp(v).strftime('%Y-%m-%dT:%H:%M:%SZ') for v in creation_times.values()]

In [None]:
# # Extract creation time from each video file
# import ffmpeg
# vid_creation_times_list = []
# for vid_file in tqdm(vid_files):
#     try:
#         probe = ffmpeg.probe(f'{VID_PATH}/{vid_file}')
#         creation_time = probe.get('format', {}).get('tags', {}).get('creation_time')
#         vid_creation_times_list.append((vid_file, creation_time))
#     except ffmpeg.Error as e:
#         vid_creation_times_list.append((vid_file, None))
#         print(f"Error probing {vid_file}: {e}")

# vid_creation_times = dict(sorted(vid_creation_times_list, key=lambda x: x[1]))

In [None]:
# list(vid_creation_times.values())

## Load from medialist

In [None]:
def add_roll(session: Session, media: dict, 
             roll_date: RollDate, driver: Driver, buggy: Buggy, sensor: Sensor, 
             roll_num: int, use_thm: bool = False):
    dt = datetime.fromtimestamp(media['date'])
    roll = Roll(roll_date_id=roll_date.id, 
                driver_id=driver.id,
                buggy_id=buggy.id,
                roll_number=roll_num,
                start_time=dt)
    session.add(roll)
    session.flush() # populates roll.id
    vid = media['lowResVideoPath'].split('/')[-1].split('.')[0]
    vid = f"%videos%/{sensor.abbreviation}/{vid}.mp4"
    session.add(RollFile(roll_id=roll.id,
                         sensor_id=sensor.id,
                         type='video_preview',
                         uri=vid))
    
    fit = media['fitURL'].split('/')[-1].split('.')[0]
    fit = f"%fit%/{sensor.abbreviation}/{fit}.fit"
    session.add(RollFile(roll_id=roll.id,
                         sensor_id=sensor.id,
                         type='fit',
                         uri=fit))
    if use_thm:
        thm = media['thumbUrl'].split('/')[-1].split('.')[0]
        thm = f"%thumbnails%/{sensor.abbreviation}/{thm}.jpg"
        session.add(RollFile(roll_id=roll.id,
                            sensor_id=sensor.id,
                            type='thumbnail',
                            uri=thm))
        
    return roll

In [None]:
roll_nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
with Session(engine) as session:
    for file in file_list:
        i = 0
        media_list = media_lists[file]
        match file:
            case "zr/medialist.json":
                roll_date = session.scalars(
                    select(RollDate).where(
                        RollDate.year == 2025, 
                        RollDate.month == 9, 
                        RollDate.day == 21
                    ) 
                ).one() 
                
                sensor = session.scalars(
                    select(Sensor)
                        .where(Sensor.abbreviation == "zr") 
                ).one()
                driver = session.scalars(select(Driver)
                                        .where(Driver.name == "Mei Xi")).one()
                buggy = session.scalars(select(Buggy)
                                        .where(Buggy.abbreviation == "inviscid")).one()
                thm = False
            case "ir/medialist.json":
                roll_date = session.scalars(
                    select(RollDate).where(
                        RollDate.year == 2025,
                        RollDate.month == 11, 
                        RollDate.day == 2
                    ) 
                ).one() 
                
                sensor = session.scalars(
                    select(Sensor)
                        .where(Sensor.abbreviation == "ir") 
                ).one()
                driver = session.scalars(select(Driver)
                                        .where(Driver.name == "Cadence")).one()
                buggy = session.scalars(select(Buggy)
                                        .where(Buggy.abbreviation == "seraph")).one()
                thm = True
    
        for media in media_list:
            r = add_roll(session, media, roll_date, driver, buggy, sensor, roll_nums[i % len(roll_nums)], thm)
            print(r)
            i += 1
    # session.rollback()
    session.commit()

In [None]:
for roll in media_list:
    vid = roll['lowResVideoPath'].split('/')[-1].split('.')[0]
    vid = f"videos/zuke_r/{vid}.GLV"
    # thum = roll['thumbUrl'].split('/')[-1].split('.')[0]
    fit = roll['fitURL'].split('/')[-1].split('.')[0]
    fit = f"backend/notebooks/data/zr/{fit}.fit"
    print(vid, fit)

In [None]:
with Session(engine) as session:
    d = session.scalars(select(Driver)
                        .where(Driver.name == "Alani")).one() 
d

In [None]:
# with get_connection() as conn:
#     conn.execute(insert(driver_table).values(name="Alani"))
#     conn.execute(insert(driver_table).values(name="Audrey"))
#     conn.execute(insert(driver_table).values(name="Cadence"))
#     conn.execute(insert(driver_table).values(name="Daisy"))
#     conn.execute(insert(driver_table).values(name="Mei Xi"))
#     conn.commit()

In [None]:
# with get_connection() as conn:
#     result = conn.execute(driver_table.select())
#     for row in result:
#         print(row)