In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_9juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-09")]

## T test on the TWS between runs 5,6,7 where Karl holds the weights and runs 8,9,10,11 where Gian holds 6kgs

In [4]:
first_runs = ["09_06_Run5","09_06_Run6","09_06_Run7"]
data_9juin_first_runs = data_9juin[data_9juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["09_06_Run8","09_06_Run9","09_06_Run10","09_06_Run11"]
data_9juin_last_runs = data_9juin[data_9juin["run"].isin(last_runs) ]

In [6]:
t_test(data_9juin_first_runs,data_9juin_last_runs, target="TWS")
print(data_9juin_first_runs["TWS"].mean(),data_9juin_last_runs["TWS"].mean())

T-statistic: -20.004, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
7.2590247128437175 7.541569498486814


## t test karl heavy vs karl not heavy

In [7]:
only_karl_first_runs_heavy = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_first_runs["boat_name"] == "Senseboard") & 
     (data_9juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
47074,2025-06-09T12:54:10.452Z,1749474000.0,52.5,52.5,43.507987,43.507985,43.507991,43.507997,,127.1,...,09_06_Run7,1,Karl Maeder,SenseBoard,master,106.975,68.88,1.943861,0.053298,1.321254
43822,2025-06-09T12:38:33.359Z,1749473000.0,55.6,55.6,43.503502,43.5035,43.503506,43.503513,,126.5,...,09_06_Run5,1,Karl Maeder,SenseBoard,master,106.975,68.713,0.049684,2.280999,1.730463
46355,2025-06-09T12:49:03.561Z,1749473000.0,56.0,56.0,43.50576,43.505762,43.505756,43.50575,1.0,105.057,...,09_06_Run6,2,Karl Maeder,SenseBoard,slave,106.975,47.292,7.535773,-1.407823,-6.510781
46249,2025-06-09T12:48:52.960Z,1749473000.0,47.4,47.4,43.504547,43.504549,43.504543,43.504538,1.0,89.0,...,09_06_Run6,2,Karl Maeder,SenseBoard,slave,106.975,47.292,0.944424,-1.857902,-1.953402
46272,2025-06-09T12:48:55.252Z,1749473000.0,43.8,43.8,43.504807,43.504809,43.504803,43.504797,1.0,100.458,...,09_06_Run6,2,Karl Maeder,SenseBoard,slave,106.975,47.292,2.341997,0.002062,-1.712383


In [8]:
only_karl_last_runs_light = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
53781,2025-06-09T13:24:44.654Z,1749475000.0,55.4,55.4,43.507835,43.507833,43.507839,43.507846,1.0,84.5,...,09_06_Run10,1,Karl Maeder,SenseBoard,slave,100.975,61.205,0.676656,0.654939,0.937048
49316,2025-06-09T13:05:50.561Z,1749474000.0,52.9,52.9,43.505969,43.505967,43.505973,43.505979,,126.5,...,09_06_Run8,1,Karl Maeder,SenseBoard,slave,100.975,61.789,9.604028,1.36696,6.6695
56260,2025-06-09T13:34:45.061Z,1749476000.0,51.6,51.6,43.505294,43.505292,43.505299,43.505305,,115.1,...,09_06_Run11,1,Karl Maeder,SenseBoard,master,100.975,67.09,1.550345,-3.722573,-1.929907
54094,2025-06-09T13:25:15.960Z,1749476000.0,47.8,47.8,43.504496,43.504494,43.5045,43.504507,1.0,113.6,...,09_06_Run10,1,Karl Maeder,SenseBoard,slave,100.975,61.205,8.005888,3.491369,7.525193
54041,2025-06-09T13:25:10.655Z,1749476000.0,61.8,61.8,43.505074,43.505072,43.505078,43.505084,1.0,129.0,...,09_06_Run10,1,Karl Maeder,SenseBoard,slave,100.975,61.205,5.649076,1.270432,4.322307


In [9]:
t_test(only_karl_first_runs_heavy,only_karl_last_runs_light) #general

T-statistic: -7.879, p-value: 0.000000000000004
The difference is statistically significant, keeping data split.


In [10]:
t_test(only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0],only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]) #upwind

T-statistic: -1.138, p-value: 0.255372507601836
The difference is not statistically significant, keeping data combined.


In [11]:
t_test(only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]<=0],only_karl_last_runs_light[only_karl_last_runs_light["TWA"]<=0]) #downwind

T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.


## t test karl heavy vs karl not heavy

In [13]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
44662,2025-06-09T12:38:48.560Z,1749473000.0,41.3,41.3,43.502049,43.502047,43.502053,43.502059,1.0,4.3,...,09_06_Run5,1,SenseBoard,Karl Maeder,slave,109.09,68.713,-5.925147,-3.369733,-6.451412
47709,2025-06-09T12:54:05.072Z,1749474000.0,58.2,58.2,43.508599,43.508597,43.508603,43.508609,1.0,3.5,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,1.199545,-0.751558,0.138641
47783,2025-06-09T12:54:12.454Z,1749474000.0,51.3,51.3,43.507862,43.50786,43.507866,43.507872,1.0,4.1,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,0.299937,0.651817,0.687388
48063,2025-06-09T12:54:40.454Z,1749474000.0,53.1,53.1,43.505044,43.505042,43.505048,43.505054,1.0,6.3,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,3.000509,-3.914507,-1.053662
48978,2025-06-09T12:56:57.558Z,1749474000.0,49.8,49.8,43.50518,43.505182,43.505176,43.505171,,3.7,...,09_06_Run7,2,SenseBoard,Karl Maeder,slave,109.09,34.293,-1.46937,-7.079916,-3.171103


In [14]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
55882,2025-06-09T13:28:19.048Z,1749476000.0,56.2,56.2,43.507671,43.507673,43.507667,43.507661,,7.07,...,09_06_Run10,2,SenseBoard,Karl Maeder,master,115.09,49.802,7.389096,3.503061,-3.545179
54790,2025-06-09T13:25:24.257Z,1749476000.0,57.7,57.7,43.503416,43.503414,43.50342,43.503426,,7.5,...,09_06_Run10,1,SenseBoard,Karl Maeder,master,115.09,61.205,9.883618,3.785563,8.942836
57004,2025-06-09T13:34:52.358Z,1749476000.0,45.0,45.0,43.504591,43.504589,43.504596,43.504602,1.0,4.8,...,09_06_Run11,1,SenseBoard,Karl Maeder,slave,115.09,67.09,0.415306,-3.354227,-2.367493
53580,2025-06-09T13:19:21.053Z,1749475000.0,54.3,54.3,43.507223,43.507225,43.507219,43.507213,,6.8,...,09_06_Run9,2,SenseBoard,Karl Maeder,slave,115.09,53.803,-15.134212,-0.388986,11.861712
58266,2025-06-09T13:38:02.359Z,1749476000.0,25.9,25.9,43.507805,43.507806,43.507801,43.507795,,2.052,...,09_06_Run11,2,SenseBoard,Karl Maeder,slave,115.09,52.596,-11.372068,3.26142,11.028102


In [15]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.


In [16]:
t_test(only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0],only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]) #upwind

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.


In [17]:
t_test(only_gian_first_runs_light[only_gian_first_runs_light["TWA"]<=0],only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]<=0]) #downwind

T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
