In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_9juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-09")]

## T test on the TWS between runs 5,6,7 where Karl holds the weights and runs 8,9,10,11 where Gian holds 6kgs

In [4]:
first_runs = ["09_06_Run5","09_06_Run6","09_06_Run7"]
data_9juin_first_runs = data_9juin[data_9juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["09_06_Run8","09_06_Run9","09_06_Run10","09_06_Run11"]
data_9juin_last_runs = data_9juin[data_9juin["run"].isin(last_runs) ]

In [6]:
t_test(data_9juin_first_runs,data_9juin_last_runs, target="TWS")
print(data_9juin_first_runs["TWS"].mean(),data_9juin_last_runs["TWS"].mean())
print(f"Average TWS in Group 1: {data_9juin_first_runs['TWS'].mean()}")
print(f"Average TWS in Group 2: {data_9juin_last_runs['TWS'].mean()}")

T-statistic: -20.004, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
7.2590247128437175 7.541569498486814
Average TWS in Group 1: 7.2590247128437175
Average TWS in Group 2: 7.541569498486814


## t test karl heavy vs karl not heavy

In [7]:
only_karl_first_runs_heavy = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_first_runs["boat_name"] == "Senseboard") & 
     (data_9juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
44954,2025-06-09T12:46:17.072Z,1749473000.0,54.8,54.8,43.506037,43.506035,43.506041,43.506047,1.0,121.7,...,09_06_Run6,1,Karl Maeder,SenseBoard,slave,106.975,68.0,2.312901,2.964729,3.739062
47215,2025-06-09T12:54:24.561Z,1749474000.0,58.4,58.4,43.506571,43.506569,43.506575,43.506581,,126.3,...,09_06_Run7,1,Karl Maeder,SenseBoard,master,106.975,68.88,0.402389,-0.736973,-0.295485
47016,2025-06-09T12:54:04.653Z,1749474000.0,59.1,59.1,43.508555,43.508553,43.508559,43.508566,,114.5,...,09_06_Run7,1,Karl Maeder,SenseBoard,master,106.975,68.88,,,
43653,2025-06-09T12:38:16.452Z,1749473000.0,63.1,63.1,43.505239,43.505237,43.505243,43.50525,,122.5,...,09_06_Run5,1,Karl Maeder,SenseBoard,master,106.975,68.713,-2.759138,-0.934074,-2.522482
47694,2025-06-09T12:55:12.452Z,1749474000.0,47.1,47.1,43.501741,43.501739,43.501745,43.501751,,121.3,...,09_06_Run7,1,Karl Maeder,SenseBoard,master,106.975,68.88,-0.828757,-17.17042,-13.522737


In [8]:
only_karl_last_runs_light = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
53020,2025-06-09T13:19:18.961Z,1749475000.0,37.4,37.4,43.506734,43.506736,43.50673,43.506724,,84.7,...,09_06_Run9,2,Karl Maeder,SenseBoard,master,100.975,53.803,-12.489409,-1.183784,9.319358
50357,2025-06-09T13:08:17.953Z,1749474000.0,42.4,42.4,43.502838,43.50284,43.502834,43.502828,,87.66,...,09_06_Run8,2,Karl Maeder,SenseBoard,slave,100.975,47.191,-0.075415,1.487801,1.074715
50596,2025-06-09T13:08:41.855Z,1749475000.0,54.2,54.2,43.50567,43.505672,43.505666,43.50566,,87.3,...,09_06_Run8,2,Karl Maeder,SenseBoard,slave,100.975,47.191,18.873987,-2.547427,-16.122154
54153,2025-06-09T13:25:21.854Z,1749476000.0,57.5,57.5,43.503861,43.503859,43.503865,43.503871,1.0,121.1,...,09_06_Run10,1,Karl Maeder,SenseBoard,slave,100.975,61.205,8.054735,2.786584,7.022441
51896,2025-06-09T13:16:41.966Z,1749475000.0,58.5,58.5,43.502199,43.502197,43.502203,43.502209,,105.8,...,09_06_Run9,1,Karl Maeder,SenseBoard,master,100.975,68.692,-14.115525,-4.42824,-13.50496


In [9]:
t_test(only_karl_first_runs_heavy,only_karl_last_runs_light) #general

print("\nUpwind and downwind for Karl:")
print(f"\nWeight of Karl on the first runs: {only_karl_first_runs_heavy['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light['SOG'].mean()}")

T-statistic: -7.879, p-value: 0.000000000000004
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

Weight of Karl on the first runs: 106.97500000000001, average SOG: 23.94727146332986
Weight of Karl on the last runs: 100.975, average SOG: 24.320151187904965


In [10]:
only_karl_first_runs_heavy_upwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0]
only_karl_last_runs_light_upwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]

t_test(only_karl_first_runs_heavy_upwind,only_karl_last_runs_light_upwind) #upwind

print("\nUpwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_upwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_upwind['SOG'].mean()}")

T-statistic: -1.138, p-value: 0.255372507601836
The difference is not statistically significant, keeping data combined.

Upwind for Karl:
Weight of Karl on the first runs: 106.97499999999997, average SOG: 22.72190383681399
Weight of Karl on the last runs: 100.97500000000001, average SOG: 22.74386574074074


In [11]:
only_karl_first_runs_heavy_downwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"] <= 0]
only_karl_last_runs_light_downwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"] <= 0]
t_test(only_karl_first_runs_heavy_downwind, only_karl_last_runs_light_downwind)  # downwind

print("\nDownwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_downwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_downwind['SOG'].mean()}")


T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Karl:
Weight of Karl on the first runs: 106.97499999999998, average SOG: 27.031662591687045
Weight of Karl on the last runs: 100.97499999999998, average SOG: 26.32492639842983


## t test Gian heavy vs karl not heavy

In [12]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
46650,2025-06-09T12:48:45.762Z,1749473000.0,58.6,58.6,43.503634,43.503636,43.50363,43.503624,1.0,3.585,...,09_06_Run6,2,SenseBoard,Karl Maeder,master,109.09,47.292,-0.07155,0.897393,0.662285
48053,2025-06-09T12:54:39.454Z,1749474000.0,59.2,59.2,43.50514,43.505138,43.505144,43.50515,1.0,6.3,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,2.34796,-3.289738,-0.994772
48183,2025-06-09T12:54:52.453Z,1749474000.0,52.5,52.5,43.503842,43.50384,43.503846,43.503852,1.0,5.775,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,2.206555,-10.767289,-6.778884
46925,2025-06-09T12:49:13.256Z,1749473000.0,49.8,49.8,43.506886,43.506887,43.506881,43.506875,1.0,5.9,...,09_06_Run6,2,SenseBoard,Karl Maeder,master,109.09,47.292,9.263385,-3.137818,-8.957542
48209,2025-06-09T12:54:55.052Z,1749474000.0,56.5,56.5,43.503584,43.503582,43.503588,43.503593,1.0,4.2,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,1.505398,-13.502514,-9.307664


In [13]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
54934,2025-06-09T13:25:38.654Z,1749476000.0,60.9,60.9,43.501833,43.501831,43.501838,43.501844,,7.6,...,09_06_Run10,1,SenseBoard,Karl Maeder,master,115.09,61.205,11.676334,3.659835,9.909272
51051,2025-06-09T13:08:40.158Z,1749475000.0,56.2,56.2,43.505549,43.505551,43.505545,43.505539,,3.1,...,09_06_Run8,2,SenseBoard,Karl Maeder,master,115.09,47.191,19.351912,-2.313902,-16.315828
56908,2025-06-09T13:34:42.756Z,1749476000.0,56.1,56.1,43.50561,43.505609,43.505615,43.505621,1.0,5.1,...,09_06_Run11,1,SenseBoard,Karl Maeder,slave,115.09,67.09,0.341986,-2.793751,-1.954839
56750,2025-06-09T13:34:26.956Z,1749476000.0,49.9,49.9,43.507254,43.507252,43.507258,43.507265,1.0,4.282,...,09_06_Run11,1,SenseBoard,Karl Maeder,slave,115.09,67.09,0.466104,-4.862079,-3.436021
54768,2025-06-09T13:25:22.056Z,1749476000.0,50.9,50.9,43.503654,43.503652,43.503658,43.503665,,9.3,...,09_06_Run10,1,SenseBoard,Karl Maeder,master,115.09,61.205,9.271357,2.942259,7.873076


In [14]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

print("\nUpwind and downwind for Gian:")
print(f"\nWeight of Gian on the first runs: {only_gian_first_runs_light['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy['SOG'].mean()}")

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

Weight of Gian on the first runs: 109.08999999999999, average SOG: 24.236319275008714
Weight of Gian on the last runs: 115.08999999999999, average SOG: 24.825573344872346


In [15]:
only_gian_first_runs_light_upwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0]
only_gian_last_runs_heavy_upwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]

t_test(only_gian_first_runs_light_upwind,only_gian_last_runs_heavy_upwind) #upwind

print("\nUpwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_upwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_upwind['SOG'].mean()}")

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind for Gian:
Weight of Gian on the first runs: 109.08999999999997, average SOG: 22.98097323600973
Weight of Gian on the last runs: 115.09000000000002, average SOG: 23.180486862442038


In [16]:
only_gian_first_runs_light_downwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"] <= 0]
only_gian_last_runs_heavy_downwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"] <= 0]
t_test(only_gian_first_runs_light_downwind, only_gian_last_runs_heavy_downwind)  # downwind

print("\nDownwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_downwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_downwind['SOG'].mean()}")


T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Gian:
Weight of Gian on the first runs: 109.08999999999999, average SOG: 27.405528255528253
Weight of Gian on the last runs: 115.08999999999997, average SOG: 26.918731563421826
