In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_9juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-09")]

## T test on the TWS between runs 5,6,7 where Karl holds the weights and runs 8,9,10,11 where Gian holds 6kgs

In [4]:
first_runs = ["09_06_Run5","09_06_Run6","09_06_Run7"]
data_9juin_first_runs = data_9juin[data_9juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["09_06_Run8","09_06_Run9","09_06_Run10","09_06_Run11"]
data_9juin_last_runs = data_9juin[data_9juin["run"].isin(last_runs) ]

In [19]:
t_test(data_9juin_first_runs,data_9juin_last_runs, target="TWS")
print(data_9juin_first_runs["TWS"].mean(),data_9juin_last_runs["TWS"].mean())
print(f"Average TWS in Group 1: {data_9juin_first_runs['TWS'].mean()},STD TWS in Group 1: {data_9juin_first_runs['TWS'].std()}")
print(f"Average TWS in Group 2: {data_9juin_last_runs['TWS'].mean()},STD TWS in Group 2: {data_9juin_last_runs['TWS'].std()}")

T-statistic: -20.004, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
7.2590247128437175 7.541569498486814
Average TWS in Group 1: 7.2590247128437175,STD TWS in Group 1: 0.909090314209303
Average TWS in Group 2: 7.541569498486814,STD TWS in Group 2: 0.7956380127932735


## t test karl heavy vs karl not heavy

In [7]:
only_karl_first_runs_heavy = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
46134,2025-06-09T12:48:41.452Z,1749473000.0,66.3,66.3,43.503165,43.503167,43.503161,43.503154,1.0,112.9,...,SenseBoard,slave,106.975,47.292,Levi,-1.91644,0.210516,1.524744,19.2,132.1
47107,2025-06-09T12:54:13.760Z,1749474000.0,53.5,53.5,43.507661,43.50766,43.507666,43.507672,,134.713,...,SenseBoard,master,106.975,68.88,Levi,-0.027364,0.606628,0.437041,20.1,154.813
48609,2025-06-09T12:56:54.960Z,1749474000.0,51.4,51.4,43.50483,43.504832,43.504827,43.504821,,98.3,...,SenseBoard,master,106.975,34.293,Levi,-1.641943,-6.88098,-2.933474,14.1,112.4
43397,2025-06-09T12:37:50.861Z,1749473000.0,58.7,58.7,43.507866,43.507864,43.50787,43.507876,,125.7,...,SenseBoard,master,106.975,68.713,Levi,-0.169418,-0.548643,-0.530918,15.2,140.9
46321,2025-06-09T12:49:00.160Z,1749473000.0,44.5,44.5,43.505364,43.505366,43.50536,43.505354,1.0,101.99,...,SenseBoard,slave,106.975,47.292,Levi,7.447623,0.440703,-5.155481,16.201,118.191


In [8]:
only_karl_last_runs_light = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
53804,2025-06-09T13:24:46.951Z,1749475000.0,55.1,55.1,43.507598,43.507596,43.507602,43.507608,1.0,118.0,...,SenseBoard,slave,100.975,61.205,Levi,-0.05224,1.206408,0.891237,17.0,135.0
50468,2025-06-09T13:08:29.048Z,1749475000.0,47.5,47.5,43.504172,43.504174,43.504168,43.504161,,104.5,...,SenseBoard,slave,100.975,47.191,Levi,7.19519,-3.129919,-7.484112,15.2,119.7
52886,2025-06-09T13:19:05.562Z,1749475000.0,48.1,48.1,43.505169,43.505171,43.505165,43.505159,,78.3,...,SenseBoard,master,100.975,53.803,Levi,-9.220969,-2.160697,6.210564,11.6,89.9
52797,2025-06-09T13:18:56.657Z,1749475000.0,56.6,56.6,43.504121,43.504123,43.504117,43.504111,,100.9,...,SenseBoard,master,100.975,53.803,Levi,-6.591985,-4.637526,2.781988,13.3,114.2
49657,2025-06-09T13:06:24.654Z,1749474000.0,53.4,53.4,43.502392,43.50239,43.502396,43.502402,,96.1,...,SenseBoard,slave,100.975,61.789,Levi,26.743593,8.238545,21.96751,10.6,106.7


In [9]:
t_test(only_karl_first_runs_heavy,only_karl_last_runs_light) #general

print("\nUpwind and downwind for Karl:")
print(f"\nWeight of Karl on the first runs: {only_karl_first_runs_heavy['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light['SOG'].mean()}, std SOG: {only_karl_last_runs_light['SOG'].std()}")

T-statistic: -7.879, p-value: 0.000000000000004
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

Weight of Karl on the first runs: 106.97500000000001, average SOG: 23.94727146332986, std SOG: 2.042511601997199
Weight of Karl on the last runs: 100.975, average SOG: 24.320151187904965, std SOG: 1.962379480136006


In [10]:
only_karl_first_runs_heavy_upwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0]
only_karl_last_runs_light_upwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]

t_test(only_karl_first_runs_heavy_upwind,only_karl_last_runs_light_upwind) #upwind

print("\nUpwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_upwind['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy_upwind['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_upwind['SOG'].mean()}, std SOG: {only_karl_last_runs_light_upwind['SOG'].std()}")

T-statistic: -1.138, p-value: 0.255372507601836
The difference is not statistically significant, keeping data combined.

Upwind for Karl:
Weight of Karl on the first runs: 106.97499999999997, average SOG: 22.72190383681399, std SOG: 0.5513928546466119
Weight of Karl on the last runs: 100.97500000000001, average SOG: 22.74386574074074, std SOG: 0.725225703905136


In [11]:
only_karl_first_runs_heavy_downwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"] <= 0]
only_karl_last_runs_light_downwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"] <= 0]
t_test(only_karl_first_runs_heavy_downwind, only_karl_last_runs_light_downwind)  # downwind

print("\nDownwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_downwind['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy_downwind['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_downwind['SOG'].mean()}, std SOG: {only_karl_last_runs_light_downwind['SOG'].std()}")


T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Karl:
Weight of Karl on the first runs: 106.97499999999998, average SOG: 27.031662591687045, std SOG: 0.7814322747955662
Weight of Karl on the last runs: 100.97499999999998, average SOG: 26.32492639842983, std SOG: 0.9483400838098708


## t test Gian heavy vs karl not heavy

In [12]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
44395,2025-06-09T12:38:21.857Z,1749473000.0,58.9,58.9,43.504727,43.504726,43.504732,43.504738,1.0,5.8,...,Karl Maeder,slave,109.09,68.713,Levi,-5.38964,3.242277,-1.136775,134.9,140.7
44623,2025-06-09T12:38:44.656Z,1749473000.0,59.4,59.4,43.502441,43.502439,43.502446,43.502452,1.0,5.2,...,Karl Maeder,slave,109.09,68.713,Levi,-4.717408,-0.280726,-3.346728,135.5,140.7
46923,2025-06-09T12:49:13.053Z,1749473000.0,53.9,53.9,43.506862,43.506863,43.506857,43.506852,1.0,8.3,...,Karl Maeder,master,109.09,47.292,Levi,9.078351,-3.283375,-8.919461,,
48201,2025-06-09T12:54:54.256Z,1749474000.0,51.9,51.9,43.503664,43.503663,43.503668,43.503674,1.0,5.0,...,Karl Maeder,slave,109.09,68.88,Levi,2.014651,-12.262104,-8.033027,117.204,122.204
46756,2025-06-09T12:48:56.360Z,1749473000.0,51.4,51.4,43.50491,43.504912,43.504906,43.5049,1.0,6.112,...,Karl Maeder,master,109.09,47.292,Levi,6.053374,0.263067,-4.243416,112.0,118.112


In [13]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
55753,2025-06-09T13:28:06.160Z,1749476000.0,47.6,47.6,43.506066,43.506068,43.506061,43.506055,,4.5,...,Karl Maeder,master,115.09,49.802,Levi,4.578158,5.463658,-0.040548,95.211,99.711
49714,2025-06-09T13:05:28.453Z,1749474000.0,50.7,50.7,43.50818,43.508178,43.508184,43.50819,,8.6,...,Karl Maeder,master,115.09,61.789,Levi,0.319926,-0.645217,-0.344281,134.4,143.0
57911,2025-06-09T13:37:26.844Z,1749476000.0,42.7,42.7,43.503655,43.503656,43.50365,43.503644,,8.1,...,Karl Maeder,slave,115.09,52.596,Levi,-4.12026,-4.002748,0.90319,107.5,115.6
52244,2025-06-09T13:16:08.056Z,1749475000.0,50.0,50.0,43.505751,43.505749,43.505755,43.505761,1.0,7.8,...,Karl Maeder,slave,115.09,68.692,Levi,-2.09117,-1.5256,-2.580432,143.0,150.8
52081,2025-06-09T13:15:51.756Z,1749475000.0,54.6,54.6,43.507407,43.507405,43.507412,43.507418,1.0,4.7,...,Karl Maeder,slave,115.09,68.692,Levi,-1.350743,3.182696,0.802839,134.7,139.4


In [14]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

print("\nUpwind and downwind for Gian:")
print(f"\nWeight of Gian on the first runs: {only_gian_first_runs_light['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light['SOG'].mean()}, std SOG: {only_gian_first_runs_light['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy['SOG'].std()}")

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

Weight of Gian on the first runs: 109.08999999999999, average SOG: 24.236319275008714, std SOG: 2.135963842904484
Weight of Gian on the last runs: 115.08999999999999, average SOG: 24.825573344872346, std SOG: 2.1231026500669974


In [15]:
only_gian_first_runs_light_upwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0]
only_gian_last_runs_heavy_upwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]

t_test(only_gian_first_runs_light_upwind,only_gian_last_runs_heavy_upwind) #upwind

print("\nUpwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_upwind['SOG'].mean()}, std SOG: {only_gian_first_runs_light_upwind['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_upwind['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy_upwind['SOG'].std()}")

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind for Gian:
Weight of Gian on the first runs: 109.08999999999997, average SOG: 22.98097323600973, std SOG: 0.7538625153686097
Weight of Gian on the last runs: 115.09000000000002, average SOG: 23.180486862442038, std SOG: 0.8136933474979365


In [16]:
only_gian_first_runs_light_downwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"] <= 0]
only_gian_last_runs_heavy_downwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"] <= 0]
t_test(only_gian_first_runs_light_downwind, only_gian_last_runs_heavy_downwind)  # downwind

print("\nDownwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_downwind['SOG'].mean()}, std SOG: {only_gian_first_runs_light_downwind['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_downwind['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy_downwind['SOG'].std()}")


T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Gian:
Weight of Gian on the first runs: 109.08999999999999, average SOG: 27.405528255528253, std SOG: 0.7868109632056288
Weight of Gian on the last runs: 115.08999999999997, average SOG: 26.918731563421826, std SOG: 1.2548178334314914
