In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")

def print_run_stats(first_sentence, first_runs_df, last_runs_df, target):
    print("\n", first_sentence)

    if first_runs_df[target].dtype == "O":
        first_target = ", ".join(first_runs_df[target].dropna().unique())
        last_target = ", ".join(last_runs_df[target].dropna().unique())
    else:
        first_target = f"{first_runs_df[target].mean():.2f}"
        last_target = f"{last_runs_df[target].mean():.2f}"

    print(f"Mean {target } on the first group : {first_target}, "
          f"average SOG: {first_runs_df['SOG'].mean():.2f}, std SOG: {first_runs_df['SOG'].std():.2f}")
    
    print(f"Mean  {target } on the second group : {last_target}, "
          f"average SOG: {last_runs_df['SOG'].mean():.2f}, std SOG: {last_runs_df['SOG'].std():.2f}")



In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_10juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-10")]

## T test on the TWS between runs 1 to 5 where Karl is on the Levitaz and Gian is on the Chubanga and runs 6 to 10 is the other way around

In [4]:
first_runs = ["10_06_Run1","10_06_Run2","10_06_Run3","10_06_Run4","10_06_Run5"]
data_10juin_first_runs = data_10juin[data_10juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["10_06_Run6","10_06_Run7","10_06_Run8","10_06_Run9","10_06_Run10"]
data_10juin_last_runs = data_10juin[data_10juin["run"].isin(last_runs) ]

In [6]:
t_test(data_10juin_first_runs,data_10juin_last_runs, target="TWS")
print_run_stats("Runs 1 to 5 VS Runs 6 to 10:", data_10juin_first_runs, data_10juin_last_runs, target="TWS")

T-statistic: -208.083, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Runs 1 to 5 VS Runs 6 to 10:
Mean TWS on the first group : 6.19, average SOG: 24.06, std SOG: 2.19
Mean  TWS on the second group : 8.06, average SOG: 24.57, std SOG: 2.16


## t test karl levi vs karl chub

In [7]:
only_karl_first_runs_levi = data_10juin_first_runs[
    (data_10juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_levi.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
64327,2025-06-10T12:44:44.740Z,1749559000.0,64.1,64.1,43.532412,43.532411,43.532417,43.532423,,129.8,...,Gian Stragiotti,master,102.89,69.201,Levi,-1.489683,1.877192,0.562547,16.7,146.5
65563,2025-06-10T12:47:54.657Z,1749560000.0,49.5,49.5,43.534966,43.534968,43.534962,43.534955,,88.1,...,Gian Stragiotti,master,102.89,48.401,Levi,-23.369596,7.805314,21.520873,11.5,99.6
59230,2025-06-10T12:28:45.352Z,1749559000.0,56.8,56.8,43.534714,43.534712,43.534718,43.534724,,135.2,...,Gian Stragiotti,master,102.89,73.306,Levi,-2.78195,-3.215745,-4.241951,18.4,153.6
69658,2025-06-10T13:05:50.658Z,1749561000.0,59.6,59.6,43.533101,43.533103,43.533097,43.53309,,91.986,...,Gian Stragiotti,master,102.89,47.799,Levi,-5.592161,-9.219043,-2.034237,10.412,102.398
62122,2025-06-10T12:37:33.150Z,1749559000.0,65.8,65.8,43.530203,43.530202,43.530207,43.530213,,98.056,...,Gian Stragiotti,slave,102.89,62.101,Levi,19.614776,-4.464558,8.527888,12.612,110.668


In [8]:
only_karl_last_runs_chub = data_10juin_last_runs[
    (data_10juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_chub.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
77016,2025-06-10T13:40:32.065Z,1749563000.0,63.2,63.2,43.531188,43.531187,43.531193,43.531199,,128.1,...,SenseBoard,master,99.995,66.197,Chub,-5.665253,3.836456,-0.949187,12.693,140.793
71275,2025-06-10T13:16:23.450Z,1749561000.0,48.3,48.3,43.532136,43.532137,43.532131,43.532125,,89.1,...,SenseBoard,slave,99.995,42.214,Chub,-3.620608,4.98308,6.000162,13.3,102.4
76524,2025-06-10T13:39:42.859Z,1749563000.0,59.5,59.5,43.536126,43.536124,43.536131,43.536137,,120.5,...,SenseBoard,master,99.995,66.197,Chub,0.803222,0.055063,0.607052,9.4,129.9
74301,2025-06-10T13:32:45.755Z,1749562000.0,58.2,58.2,43.536144,43.536142,43.536148,43.536154,,115.555,...,SenseBoard,slave,99.995,62.598,Chub,-0.994006,-0.13811,-0.770579,13.6,129.155
72561,2025-06-10T13:23:29.263Z,1749562000.0,49.3,49.3,43.530073,43.530071,43.530077,43.530083,,110.4,...,SenseBoard,master,99.995,67.8,Chub,-4.704631,-11.395003,-11.933099,8.961,119.361


In [9]:
t_test(only_karl_first_runs_levi,only_karl_last_runs_chub) #general
print("\nUpwind and downwind for Karl:")
print_run_stats("Karl on Levi VS Karl on Chub:", only_karl_first_runs_levi, only_karl_last_runs_chub, target="mast_brand")

T-statistic: -15.956, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

 Karl on Levi VS Karl on Chub:
Mean mast_brand on the first group : Levi, average SOG: 23.80, std SOG: 2.03
Mean  mast_brand on the second group : Chub, average SOG: 24.44, std SOG: 2.17


In [10]:
only_karl_first_runs_levi_upwind = only_karl_first_runs_levi[only_karl_first_runs_levi["TWA"]>0]
only_karl_last_runs_chub_upwind = only_karl_last_runs_chub[only_karl_last_runs_chub["TWA"]>0]
# upwind
print("\nUpwind for Karl:")
t_test(only_karl_first_runs_levi_upwind,only_karl_last_runs_chub_upwind)
print_run_stats("Karl on Levi upwind VS Karl on Chub upwind:", only_karl_first_runs_levi_upwind, only_karl_last_runs_chub_upwind, target="mast_brand")


Upwind for Karl:
T-statistic: -44.815, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Karl on Levi upwind VS Karl on Chub upwind:
Mean mast_brand on the first group : Levi, average SOG: 22.12, std SOG: 0.73
Mean  mast_brand on the second group : Chub, average SOG: 22.84, std SOG: 0.54


In [11]:
only_karl_first_runs_levi_downwind = only_karl_first_runs_levi[only_karl_first_runs_levi["TWA"] <= 0]
only_karl_last_runs_chub_downwind = only_karl_last_runs_chub[only_karl_last_runs_chub["TWA"] <= 0]
#downwind
print("\nDownwind for Karl:")
t_test(only_karl_first_runs_levi_downwind,only_karl_last_runs_chub_downwind)
print_run_stats("Karl on Levi downwind VS Karl on Chub downwind:", only_karl_first_runs_levi_downwind, only_karl_last_runs_chub_downwind, target="mast_brand")


Downwind for Karl:
T-statistic: -56.835, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Karl on Levi downwind VS Karl on Chub downwind:
Mean mast_brand on the first group : Levi, average SOG: 25.86, std SOG: 0.91
Mean  mast_brand on the second group : Chub, average SOG: 27.20, std SOG: 0.56


## t test Gian heavy vs karl not heavy

In [12]:
only_gian_first_runs_chub = data_10juin_first_runs[
    (data_10juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_10juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_chub.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
67120,2025-06-10T12:56:44.559Z,1749560000.0,56.7,56.7,43.535428,43.53543,43.535424,43.535418,,118.911,...,SenseBoard,master,106.195,53.319,Chub,32.955512,-1.614282,-24.524371,18.956,137.867
63700,2025-06-10T12:44:51.253Z,1749559000.0,63.9,63.9,43.53176,43.531758,43.531764,43.531771,,128.4,...,SenseBoard,slave,106.195,69.201,Chub,-2.568955,-2.823459,-3.807107,14.9,143.3
60065,2025-06-10T12:31:44.752Z,1749559000.0,60.8,60.8,43.532472,43.532474,43.532468,43.532462,1.0,101.6,...,SenseBoard,slave,106.195,54.787,Chub,-12.347285,-6.708012,2.05466,10.5,112.1
67872,2025-06-10T13:02:34.456Z,1749561000.0,64.8,64.8,43.534456,43.534454,43.53446,43.534466,,133.2,...,SenseBoard,slave,106.195,61.199,Chub,-0.992293,-0.476019,-1.000079,14.1,147.3
58511,2025-06-10T12:28:46.753Z,1749559000.0,66.3,66.3,43.534585,43.534583,43.534589,43.534595,1.0,105.0,...,SenseBoard,slave,106.195,73.306,Chub,-3.631926,-4.128474,-5.483013,11.449,116.449


In [13]:
only_gian_last_runs_levi = data_10juin_last_runs[
    (data_10juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_10juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_levi.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
76410,2025-06-10T13:36:15.755Z,1749563000.0,47.6,47.6,43.534342,43.534344,43.534337,43.534331,,106.0,...,Karl Maeder,master,109.09,47.895,Levi,1.678837,-4.110697,-3.869077,18.6,124.6
80039,2025-06-10T13:50:01.755Z,1749563000.0,55.9,55.9,43.529958,43.529956,43.529962,43.529968,,119.5,...,Karl Maeder,master,109.09,67.905,Levi,0.195086,3.992024,3.217347,19.5,139.0
76071,2025-06-10T13:35:41.854Z,1749563000.0,53.5,53.5,43.530119,43.530121,43.530115,43.530108,,115.5,...,Karl Maeder,master,109.09,47.895,Levi,0.174876,-0.096957,-0.197857,21.6,137.1
80111,2025-06-10T13:50:08.956Z,1749563000.0,55.6,55.6,43.529216,43.529214,43.529221,43.529227,,128.19,...,Karl Maeder,master,109.09,67.905,Levi,2.186418,5.711618,5.814881,15.0,143.19
77219,2025-06-10T13:39:46.155Z,1749563000.0,64.4,64.4,43.535876,43.535874,43.53588,43.535886,,133.8,...,Karl Maeder,slave,109.09,66.197,Levi,-1.539395,1.979353,0.245364,21.5,155.3


In [14]:
t_test(only_gian_first_runs_chub,only_gian_last_runs_levi) #GENERAL
print("\nUpwind and downwind for Gian:")
print_run_stats("Gian on chub VS Gian on levi:", only_gian_first_runs_chub, only_gian_last_runs_levi, target="mast_brand")

T-statistic: -8.918, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

 Gian on chub VS Gian on levi:
Mean mast_brand on the first group : Chub, average SOG: 24.32, std SOG: 2.30
Mean  mast_brand on the second group : Levi, average SOG: 24.70, std SOG: 2.14


In [15]:
only_gian_first_runs_chub_upwind = only_gian_first_runs_chub[only_gian_first_runs_chub["TWA"]>0]
only_gian_last_runs_levi_upwind = only_gian_last_runs_levi[only_gian_last_runs_levi["TWA"]>0]
print("\nUpwind for Gian:")
t_test(only_gian_first_runs_chub_upwind,only_gian_last_runs_levi_upwind) #upwind
print_run_stats("Gian on chub upwind VS Gian on levi upwind:", only_gian_first_runs_chub_upwind, only_gian_last_runs_levi_upwind, target="mast_brand")


Upwind for Gian:
T-statistic: -35.891, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Gian on chub upwind VS Gian on levi upwind:
Mean mast_brand on the first group : Chub, average SOG: 22.42, std SOG: 0.98
Mean  mast_brand on the second group : Levi, average SOG: 23.13, std SOG: 0.55


In [16]:
only_gian_first_runs_chub_downwind = only_gian_first_runs_chub[only_gian_first_runs_chub["TWA"] <= 0]
only_gian_last_runs_levi_downwind = only_gian_last_runs_levi[only_gian_last_runs_levi["TWA"] <= 0]
print("\nDownwind for Gian:")
t_test(only_gian_first_runs_chub_downwind,only_gian_last_runs_levi_downwind) #upwind
print_run_stats("Gian on chub downwind VS Gian on levi downwind:", only_gian_first_runs_chub_downwind, only_gian_last_runs_levi_downwind, target="mast_brand")


Downwind for Gian:
T-statistic: -32.900, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

 Gian on chub downwind VS Gian on levi downwind:
Mean mast_brand on the first group : Chub, average SOG: 26.65, std SOG: 0.86
Mean  mast_brand on the second group : Levi, average SOG: 27.41, std SOG: 0.58
