In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")

def print_run_stats(first_sentence, first_runs_df, last_runs_df, target):
    print("\n", first_sentence)

    if first_runs_df[target].dtype == "O":
        first_target = ", ".join(first_runs_df[target].dropna().unique())
        last_target = ", ".join(last_runs_df[target].dropna().unique())
    else:
        first_target = f"{first_runs_df[target].mean():.2f}"
        last_target = f"{last_runs_df[target].mean():.2f}"

    print(f"Mean {target } on the first group : {first_target}, "
          f"average SOG: {first_runs_df['SOG'].mean():.2f}, std SOG: {first_runs_df['SOG'].std():.2f}")
    
    print(f"Mean  {target } on the second group : {last_target}, "
          f"average SOG: {last_runs_df['SOG'].mean():.2f}, std SOG: {last_runs_df['SOG'].std():.2f}")



In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_10juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-10")]

## T test on the TWS between runs 1 to 5 where Karl is on the Levitaz and Gian is on the Chubanga and runs 6 to 10 is the other way around

In [4]:
first_runs = ["10_06_Run1","10_06_Run2","10_06_Run3","10_06_Run4","10_06_Run5"]
data_10juin_first_runs = data_10juin[data_10juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["10_06_Run6","10_06_Run7","10_06_Run8","10_06_Run9","10_06_Run10"]
data_10juin_last_runs = data_10juin[data_10juin["run"].isin(last_runs) ]

In [6]:
t_test(data_10juin_first_runs,data_10juin_last_runs, target="TWS")
print_run_stats("Runs 1 to 5 VS Runs 6 to 10:", data_10juin_first_runs, data_10juin_last_runs, target="TWS")

T-statistic: -208.083, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Runs 1 to 5 VS Runs 6 to 10:
Mean TWS on the first group : 6.19, average SOG: 24.06, std SOG: 2.19
Mean  TWS on the second group : 8.06, average SOG: 24.57, std SOG: 2.16


## t test karl levi vs karl chub

In [7]:
only_karl_first_runs_levi = data_10juin_first_runs[
    (data_10juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_levi.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
64002,2025-06-10T12:44:12.259Z,1749559000.0,54.1,54.1,43.535579,43.535577,43.535584,43.53559,,107.944,...,69.201,Levi,-3.068145,1.37808,-0.762753,5.514,8.7,107.944,14.214,122.158
64339,2025-06-10T12:44:45.951Z,1749559000.0,62.7,62.7,43.532292,43.53229,43.532296,43.532302,,97.911,...,69.201,Levi,-1.571817,1.710493,0.379589,5.1,6.688,97.911,11.788,109.699
63995,2025-06-10T12:44:11.540Z,1749559000.0,57.8,57.8,43.535646,43.535645,43.535651,43.535657,,116.2,...,69.201,Levi,-2.035507,1.828349,0.219101,6.6,9.7,116.2,16.3,132.5
67296,2025-06-10T12:56:08.858Z,1749560000.0,49.7,49.7,43.530946,43.530948,43.530942,43.530936,,87.3,...,53.319,Levi,12.511613,0.499781,-8.110887,5.19,7.5,87.3,12.69,99.99
60877,2025-06-10T12:32:11.150Z,1749559000.0,64.4,64.4,43.535273,43.535275,43.535269,43.535263,,96.8,...,54.787,Levi,-17.800566,-11.262627,1.001341,6.2,6.3,96.8,12.5,109.3


In [8]:
only_karl_last_runs_chub = data_10juin_last_runs[
    (data_10juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_chub.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
75692,2025-06-10T13:35:51.858Z,1749563000.0,50.1,50.1,43.531425,43.531427,43.53142,43.531414,,96.5,...,47.895,Chub,0.216459,0.418649,0.085618,6.3,6.721,96.5,13.021,109.521
75602,2025-06-10T13:35:42.858Z,1749563000.0,44.6,44.6,43.5303,43.530302,43.530296,43.53029,,68.978,...,47.895,Chub,-0.623393,0.056485,0.529423,3.818,4.1,68.978,7.918,76.896
70062,2025-06-10T13:13:26.053Z,1749561000.0,52.0,52.0,43.53429,43.534288,43.534294,43.5343,,97.8,...,59.901,Chub,4.693162,-5.473582,-1.218587,4.4,4.8,97.8,9.2,107.0
73634,2025-06-10T13:26:00.260Z,1749562000.0,47.1,47.1,43.533939,43.533941,43.533935,43.533929,,72.1,...,51.106,Chub,-7.830115,5.366402,9.449401,2.8,5.4,72.1,8.2,80.3
77154,2025-06-10T13:40:45.845Z,1749563000.0,60.5,60.5,43.529787,43.529785,43.529792,43.529798,,107.2,...,66.197,Chub,-9.605457,6.270832,-1.68091,4.6,5.1,107.2,9.7,116.9


In [9]:
t_test(only_karl_first_runs_levi,only_karl_last_runs_chub) #general
print("\nUpwind and downwind for Karl:")
print_run_stats("Karl on Levi VS Karl on Chub:", only_karl_first_runs_levi, only_karl_last_runs_chub, target="mast_brand")

T-statistic: -15.956, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

 Karl on Levi VS Karl on Chub:
Mean mast_brand on the first group : Levi, average SOG: 23.80, std SOG: 2.03
Mean  mast_brand on the second group : Chub, average SOG: 24.44, std SOG: 2.17


In [10]:
only_karl_first_runs_levi_upwind = only_karl_first_runs_levi[only_karl_first_runs_levi["TWA"]>0]
only_karl_last_runs_chub_upwind = only_karl_last_runs_chub[only_karl_last_runs_chub["TWA"]>0]
# upwind
print("\nUpwind for Karl:")
t_test(only_karl_first_runs_levi_upwind,only_karl_last_runs_chub_upwind)
print_run_stats("Karl on Levi upwind VS Karl on Chub upwind:", only_karl_first_runs_levi_upwind, only_karl_last_runs_chub_upwind, target="mast_brand")


Upwind for Karl:
T-statistic: -44.815, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Karl on Levi upwind VS Karl on Chub upwind:
Mean mast_brand on the first group : Levi, average SOG: 22.12, std SOG: 0.73
Mean  mast_brand on the second group : Chub, average SOG: 22.84, std SOG: 0.54


In [11]:
only_karl_first_runs_levi_downwind = only_karl_first_runs_levi[only_karl_first_runs_levi["TWA"] <= 0]
only_karl_last_runs_chub_downwind = only_karl_last_runs_chub[only_karl_last_runs_chub["TWA"] <= 0]
#downwind
print("\nDownwind for Karl:")
t_test(only_karl_first_runs_levi_downwind,only_karl_last_runs_chub_downwind)
print_run_stats("Karl on Levi downwind VS Karl on Chub downwind:", only_karl_first_runs_levi_downwind, only_karl_last_runs_chub_downwind, target="mast_brand")


Downwind for Karl:
T-statistic: -56.835, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Karl on Levi downwind VS Karl on Chub downwind:
Mean mast_brand on the first group : Levi, average SOG: 25.86, std SOG: 0.91
Mean  mast_brand on the second group : Chub, average SOG: 27.20, std SOG: 0.56


## t test Gian chub vs Gian levi

In [12]:
only_gian_first_runs_chub = data_10juin_first_runs[
    (data_10juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_10juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_chub.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
63914,2025-06-10T12:45:12.651Z,1749560000.0,61.1,61.1,43.529617,43.529615,43.529621,43.529627,,128.1,...,69.201,Chub,-4.472254,-12.487344,-12.51538,11.1,11.6,128.1,22.7,150.8
62357,2025-06-10T12:39:49.553Z,1749559000.0,57.1,57.1,43.531772,43.531774,43.531768,43.531762,,103.4,...,54.391,Chub,7.169173,0.969002,-3.229373,8.1,8.9,103.4,17.0,120.4
58756,2025-06-10T12:29:11.253Z,1749559000.0,65.8,65.8,43.532238,43.532236,43.532242,43.532249,1.0,145.2,...,73.306,Chub,-2.701089,-7.3267,-7.488506,9.1,11.6,145.2,20.7,165.9
65972,2025-06-10T12:53:27.156Z,1749560000.0,57.1,57.1,43.532128,43.532126,43.532132,43.532137,,123.5,...,51.991,Chub,17.136542,-11.05514,0.725792,10.0,10.4,123.5,20.4,143.9
60975,2025-06-10T12:36:40.651Z,1749559000.0,54.5,54.5,43.535105,43.535104,43.53511,43.535116,,131.7,...,62.101,Chub,4.364082,-2.080385,1.103939,8.1,8.4,131.7,16.5,148.2


In [13]:
only_gian_last_runs_levi = data_10juin_last_runs[
    (data_10juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_10juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_levi.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
70504,2025-06-10T13:13:10.255Z,1749561000.0,71.9,71.9,43.535809,43.535807,43.535813,43.535819,,140.672,...,59.901,Levi,1.030012,-0.291745,0.413648,10.7,13.0,140.672,23.7,164.372
75325,2025-06-10T13:33:25.444Z,1749562000.0,61.3,61.3,43.532056,43.532054,43.53206,43.532066,,130.5,...,62.598,Levi,8.952035,9.640888,13.153035,10.8,11.0,130.5,21.8,152.3
71550,2025-06-10T13:16:08.746Z,1749561000.0,51.4,51.4,43.530247,43.530249,43.530242,43.530236,,102.5,...,42.214,Levi,-2.329484,1.852402,2.97295,7.5,11.9,102.5,19.4,121.9
71039,2025-06-10T13:14:03.757Z,1749561000.0,55.8,55.8,43.530337,43.530335,43.530341,43.530347,,112.6,...,59.901,Levi,13.869512,-20.013549,-7.097113,7.8,9.8,112.6,17.6,130.2
70988,2025-06-10T13:13:58.655Z,1749561000.0,56.5,56.5,43.530846,43.530844,43.53085,43.530855,,141.1,...,59.901,Levi,14.111581,-19.260714,-6.257446,7.0,8.3,141.1,15.3,156.4


In [14]:
t_test(only_gian_first_runs_chub,only_gian_last_runs_levi) #GENERAL
print("\nUpwind and downwind for Gian:")
print_run_stats("Gian on chub VS Gian on levi:", only_gian_first_runs_chub, only_gian_last_runs_levi, target="mast_brand")

T-statistic: -8.918, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

 Gian on chub VS Gian on levi:


Mean mast_brand on the first group : Chub, average SOG: 24.32, std SOG: 2.30
Mean  mast_brand on the second group : Levi, average SOG: 24.70, std SOG: 2.14


In [15]:
only_gian_first_runs_chub_upwind = only_gian_first_runs_chub[only_gian_first_runs_chub["TWA"]>0]
only_gian_last_runs_levi_upwind = only_gian_last_runs_levi[only_gian_last_runs_levi["TWA"]>0]
print("\nUpwind for Gian:")
t_test(only_gian_first_runs_chub_upwind,only_gian_last_runs_levi_upwind) #upwind
print_run_stats("Gian on chub upwind VS Gian on levi upwind:", only_gian_first_runs_chub_upwind, only_gian_last_runs_levi_upwind, target="mast_brand")


Upwind for Gian:
T-statistic: -35.891, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Gian on chub upwind VS Gian on levi upwind:
Mean mast_brand on the first group : Chub, average SOG: 22.42, std SOG: 0.98
Mean  mast_brand on the second group : Levi, average SOG: 23.13, std SOG: 0.55


In [16]:
only_gian_first_runs_chub_downwind = only_gian_first_runs_chub[only_gian_first_runs_chub["TWA"] <= 0]
only_gian_last_runs_levi_downwind = only_gian_last_runs_levi[only_gian_last_runs_levi["TWA"] <= 0]
print("\nDownwind for Gian:")
t_test(only_gian_first_runs_chub_downwind,only_gian_last_runs_levi_downwind) #upwind
print_run_stats("Gian on chub downwind VS Gian on levi downwind:", only_gian_first_runs_chub_downwind, only_gian_last_runs_levi_downwind, target="mast_brand")


Downwind for Gian:
T-statistic: -32.900, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Gian on chub downwind VS Gian on levi downwind:
Mean mast_brand on the first group : Chub, average SOG: 26.65, std SOG: 0.86
Mean  mast_brand on the second group : Levi, average SOG: 27.41, std SOG: 0.58
