In [1]:
import os
import json
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from datetime import datetime
from cog_analysis import load_boat_data
from IPython.display import display

def load_summary_intervals(summary_file="summary.json"):
    with open(summary_file, "r") as f:
        summary = json.load(f)
    return {r["run"]: r["intervals"] for r in summary}

def filter_interval(df, start, end):
    return df[(df["SecondsSince1970"] >= start) & (df["SecondsSince1970"] <= end)].reset_index(drop=True)

def compute_stats(df, columns):
    stats = {}
    for col in columns:
        data = df[col].dropna()
        if col == "COG":
            angles = np.deg2rad(data)
            mean = np.rad2deg(np.arctan2(np.mean(np.sin(angles)), np.mean(np.cos(angles)))) % 360
            stats[col] = {"Avg": mean, "Min": data.min(), "Max": data.max(), "Count": len(data), "StdDev": data.std()}
        else:
            stats[col] = {"Avg": data.mean(), "Min": data.min(), "Max": data.max(), "Count": len(data), "StdDev": data.std()}
    return pd.DataFrame(stats).T

def compute_distance(lat, lon):
    if len(lat) < 2:
        return 0
    return sum(geodesic((lat.iloc[i - 1], lon.iloc[i - 1]), (lat.iloc[i], lon.iloc[i])).meters 
               for i in range(1, len(lat)))

def compute_straight_line(lat, lon):
    if len(lat) < 2:
        return 0
    return geodesic((lat.iloc[0], lon.iloc[0]), (lat.iloc[-1], lon.iloc[-1])).meters

def compute_vmg_gain(df1, df2):
    if df1.empty or df2.empty or len(df1) < 10 or len(df2) < 10:
        return pd.DataFrame([["N/A", "N/A", "N/A", "N/A"]],
                          columns=["Begin", "End", "Gain (total)", "Gain (per Minute)"], 
                          index=["VMG"])
    
    ktomps = 0.51444
    vmg1_start = df1["VMG"].head(10).mean() * ktomps
    vmg2_start = df2["VMG"].head(10).mean() * ktomps
    vmg1_end = df1["VMG"].tail(10).mean() * ktomps
    vmg2_end = df2["VMG"].tail(10).mean() * ktomps
    duration = df1["SecondsSince1970"].iloc[-1] - df1["SecondsSince1970"].iloc[0]
    gain = (vmg2_end - vmg1_end) - (vmg2_start - vmg1_start)
    gain_per_min = gain / (duration / 60) if duration else float("nan")
    return pd.DataFrame([[f"{vmg2_start - vmg1_start:+.1f} m", f"{vmg2_end - vmg1_end:+.1f} m", f"{gain:+.1f} m", f"{gain_per_min:+.2f} m/min"]],
                      columns=["Begin", "End", "Gain (total)", "Gain (per Minute)"], index=["VMG"])

"""
def merge_stats(stats1, stats2, label1, label2):
    stats1.columns = [f"{c} ({label1})" for c in stats1.columns]
    stats2.columns = [f"{c} ({label2})" for c in stats2.columns]
    return pd.concat([stats1, stats2], axis=1)
"""
def merge_stats(stats1, stats2, label1, label2):
    stats1 = stats1.rename(columns=lambda x: f"{x} ({label1})")
    stats2 = stats2.rename(columns=lambda x: f"{x} ({label2})")
    combined = pd.concat([stats1, stats2], axis=1)
    order = ["Avg", "Min", "Max", "Count", "StdDev"]
    cols = [f"{stat} ({label})" for stat in order for label in (label1, label2) if f"{stat} ({label})" in combined.columns]
    return combined[cols]


def display_all(merged_stats, summary_df, gain_table, boat1_name, boat2_name):
    display(
        merged_stats.style
        .format("{:.4g}")
        .set_caption("Run tot Statistics")
    )
    display(
        summary_df.style
        .format("{:.4g}")
        .set_caption("Résumé des distances")
    )
    display(
        gain_table.style
        .format({"VMG": "{:+.4g} m"})
        .set_caption(f"{boat1_name} gains relative to {boat2_name} in Meters")
    )
def load_and_reduce_boat_data(run_path, summary_dict):
    csv_files = sorted([f for f in os.listdir(run_path) if f.endswith(".csv")])
    if len(csv_files) < 2:
        raise ValueError("At least two CSV files are required.")

    boat1_df, boat2_df, boat1_name, boat2_name = load_boat_data(
        os.path.join(run_path, csv_files[0]),
        os.path.join(run_path, csv_files[1])
    )

    # Check if DataFrames are empty
    if boat1_df.empty or boat2_df.empty:
        raise ValueError("One or both boat DataFrames are empty")

    run_name = os.path.basename(run_path)
    if run_name not in summary_dict:
        raise ValueError(f"No intervals for run: {run_name}")
    intervals = summary_dict[run_name]
    if len(intervals) < 2:
        raise ValueError(f"Run {run_name} must have at least two intervals.")

    # Create filtered DataFrames
    boat1_int1 = filter_interval(boat1_df, intervals[0]["start_time"], intervals[0]["end_time"])
    boat2_int1 = filter_interval(boat2_df, intervals[0]["start_time"], intervals[0]["end_time"])
    boat1_int2 = filter_interval(boat1_df, intervals[1]["start_time"], intervals[1]["end_time"])
    boat2_int2 = filter_interval(boat2_df, intervals[1]["start_time"], intervals[1]["end_time"])

    # Check if filtered intervals are empty
    if boat1_int1.empty or boat2_int1.empty or boat1_int2.empty or boat2_int2.empty:
        raise ValueError("One or more interval DataFrames are empty")

    return {
        "full_df1": boat1_df,
        "full_df2": boat2_df,
        "reduced_boat1_int1_df": boat1_int1,
        "reduced_boat2_int1_df": boat2_int1,
        "reduced_boat1_int2_df": boat1_int2,
        "reduced_boat2_int2_df": boat2_int2,
        "boat1_name": boat1_name,
        "boat2_name": boat2_name
    }

def compare_runs(df1, df2, label1, label2):
    cols = ["TWS", "TWD", "SOG", "VMG", "COG", "TWA_Abs", "Heel_Lwd", "Trim"]
    stats1 = compute_stats(df1, cols)
    stats2 = compute_stats(df2, cols)
    dist1 = compute_distance(df1["Lat"], df1["Lon"])
    dist2 = compute_distance(df2["Lat"], df2["Lon"])
    straight1 = compute_straight_line(df1["Lat"], df1["Lon"])
    straight2 = compute_straight_line(df2["Lat"], df2["Lon"])
    summary_df = pd.DataFrame({label1: [dist1, straight1], label2: [dist2, straight2]}, index=["Distance [m]", "Straight Line [m]"])
    return stats1, stats2, summary_df

def process_run(df1, df2, name1, name2, title):
    if df1.empty or df2.empty:
        print(f"⚠️ Skipping {title} due to empty DataFrames: {name1} vs {name2}")
        return

    df1 = df1.copy()
    df2 = df2.copy()
    df1["ISODateTimeUTC"] = pd.to_datetime(df1["ISODateTimeUTC"], errors="coerce")
    df2["ISODateTimeUTC"] = pd.to_datetime(df2["ISODateTimeUTC"], errors="coerce")

    # Check for valid timestamps
    if df1["ISODateTimeUTC"].isna().all() or df2["ISODateTimeUTC"].isna().all():
        print(f"⚠️ Skipping {title} due to invalid timestamps: {name1} vs {name2}")
        return

    start_time = df1["ISODateTimeUTC"].iloc[0].ceil("min")
    df1 = df1[df1["ISODateTimeUTC"] >= start_time]
    df2 = df2[df2["ISODateTimeUTC"] >= start_time]

    if df1.empty or df2.empty:
        print(f"⚠️ Skipping {title} after time alignment: {name1} vs {name2}")
        return

    print("\n" + "=" * 80)
    print(f"📊 {title}: {name1} vs {name2}")
    print("=" * 80)

    stats1, stats2, summary_df = compare_runs(df1, df2, name1, name2)
    merged_stats = merge_stats(stats1, stats2, name1, name2)
    gain = compute_vmg_gain(df1, df2)
    display_all(merged_stats, summary_df, gain, name1, name2)

def process_all_run(run_path, summary_path):
    summary_dict = load_summary_intervals(summary_path)
    result = load_and_reduce_boat_data(run_path, summary_dict)
    name1, name2 = result["boat1_name"], result["boat2_name"]

    process_run(result["full_df1"], result["full_df2"], name1, name2, "Full Interval")
    process_run(result["reduced_boat1_int1_df"], result["reduced_boat2_int1_df"], name1, name2, "Interval 1")
    process_run(result["reduced_boat1_int2_df"], result["reduced_boat2_int2_df"], name1, name2, "Interval 2")


In [2]:
run_path = "../Data_Sailnjord/Straight_lines/06_06/06_06_Run1"
summary_path = "summary.json"

process_all_run(run_path, summary_path)



📊 Full Interval: Gian Stragiotti vs Karl Maeder


Unnamed: 0,Avg (Gian Stragiotti),Avg (Karl Maeder),Min (Gian Stragiotti),Min (Karl Maeder),Max (Gian Stragiotti),Max (Karl Maeder),Count (Gian Stragiotti),Count (Karl Maeder),StdDev (Gian Stragiotti),StdDev (Karl Maeder)
TWS,7.388,7.388,5.204,5.204,9.5,9.5,3903,3903,1.021,1.021
TWD,176.9,176.9,166.6,166.6,195.3,195.3,3903,3903,7.709,7.709
SOG,18.53,18.74,0.0,5.6,28.3,27.4,3903,3903,5.83,4.469
VMG,10.73,10.43,0.0,0.003,20.2,19.98,3903,3903,5.034,4.351
COG,110.2,107.8,1.0,0.2,360.0,360.0,3903,3903,96.29,96.47
TWA_Abs,83.41,84.39,0.656,0.697,179.5,179.9,3903,3903,39.17,38.22
Heel_Lwd,40.04,40.4,-112.2,-26.4,95.6,76.1,3852,3839,22.23,16.19
Trim,11.91,7.739,-7.9,-7.7,49.3,42.8,3903,3903,5.829,4.889


Unnamed: 0,Gian Stragiotti,Karl Maeder
Distance [m],3737.0,3771.0
Straight Line [m],189.9,228.2


Unnamed: 0,Begin,End,Gain (total),Gain (per Minute)
VMG,+0.2 m,-0.6 m,-0.8 m,-0.12 m/min



📊 Interval 1: Gian Stragiotti vs Karl Maeder


Unnamed: 0,Avg (Gian Stragiotti),Avg (Karl Maeder),Min (Gian Stragiotti),Min (Karl Maeder),Max (Gian Stragiotti),Max (Karl Maeder),Count (Gian Stragiotti),Count (Karl Maeder),StdDev (Gian Stragiotti),StdDev (Karl Maeder)
TWS,7.165,7.165,5.204,5.204,9.395,9.394,1031,1031,1.28,1.28
TWD,171.5,171.5,167.9,167.9,173.8,173.8,1031,1031,1.507,1.507
SOG,20.34,20.06,16.8,17.9,22.3,22.2,1031,1031,0.9389,0.6387
VMG,13.71,13.43,9.711,10.02,17.37,16.49,1031,1031,1.444,1.177
COG,124.0,123.6,109.9,112.5,137.2,136.9,1031,1031,4.733,4.017
TWA_Abs,47.44,47.9,35.49,36.52,60.84,58.91,1031,1031,4.8,3.902
Heel_Lwd,55.2,53.49,36.5,34.3,73.5,76.1,1031,1031,6.227,5.986
Trim,8.238,5.345,-7.9,-4.1,17.8,16.6,1031,1031,3.504,3.296


Unnamed: 0,Gian Stragiotti,Karl Maeder
Distance [m],1081,1066
Straight Line [m],1075,1061


Unnamed: 0,Begin,End,Gain (total),Gain (per Minute)
VMG,+0.4 m,-1.0 m,-1.5 m,-0.86 m/min



📊 Interval 2: Gian Stragiotti vs Karl Maeder


Unnamed: 0,Avg (Gian Stragiotti),Avg (Karl Maeder),Min (Gian Stragiotti),Min (Karl Maeder),Max (Gian Stragiotti),Max (Karl Maeder),Count (Gian Stragiotti),Count (Karl Maeder),StdDev (Gian Stragiotti),StdDev (Karl Maeder)
TWS,7.181,7.181,6.6,6.6,7.995,7.995,510,510,0.3109,0.3109
TWD,189.9,189.9,185.6,185.6,195.3,195.3,510,510,3.05,3.05
SOG,25.69,24.6,22.9,22.6,28.3,26.1,510,510,1.229,0.8187
VMG,15.6,12.95,11.48,8.383,19.61,17.04,510,510,1.747,1.805
COG,317.7,311.8,301.4,297.0,330.5,321.9,510,510,6.782,5.202
TWA_Abs,127.9,121.9,113.9,110.6,140.2,133.3,510,510,6.122,4.906
Heel_Lwd,47.33,48.16,17.1,28.3,65.8,72.0,510,510,7.557,7.963
Trim,10.03,6.203,0.3,-2.8,20.2,21.5,510,510,3.162,4.019


Unnamed: 0,Gian Stragiotti,Karl Maeder
Distance [m],672.3,644.1
Straight Line [m],666.7,640.5


Unnamed: 0,Begin,End,Gain (total),Gain (per Minute)
VMG,-0.2 m,-0.6 m,-0.3 m,-0.37 m/min
