In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_9juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-09")]

## T test on the TWS between runs 5,6,7 where Karl holds the weights and runs 8,9,10,11 where Gian holds 6kgs

In [4]:
first_runs = ["09_06_Run5","09_06_Run6","09_06_Run7"]
data_9juin_first_runs = data_9juin[data_9juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["09_06_Run8","09_06_Run9","09_06_Run10","09_06_Run11"]
data_9juin_last_runs = data_9juin[data_9juin["run"].isin(last_runs) ]

In [6]:
t_test(data_9juin_first_runs,data_9juin_last_runs, target="TWS")
print(data_9juin_first_runs["TWS"].mean(),data_9juin_last_runs["TWS"].mean())
print(f"Average TWS in Group 1: {data_9juin_first_runs['TWS'].mean()}")
print(f"Average TWS in Group 2: {data_9juin_last_runs['TWS'].mean()}")

T-statistic: -20.004, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
7.2590247128437175 7.541569498486814
Average TWS in Group 1: 7.2590247128437175
Average TWS in Group 2: 7.541569498486814


## t test karl heavy vs karl not heavy

In [7]:
only_karl_first_runs_heavy = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
48630,2025-06-09T12:56:57.051Z,1749474000.0,57.1,57.1,43.505066,43.505068,43.505062,43.505057,,81.678,...,SenseBoard,master,106.975,34.293,Levi,-0.124842,-7.03603,-4.210763,13.0,94.678
46336,2025-06-09T12:49:01.652Z,1749473000.0,50.2,50.2,43.505542,43.505543,43.505537,43.505531,1.0,103.774,...,SenseBoard,slave,106.975,47.292,Levi,5.99984,-0.848606,-4.988032,9.056,112.83
43520,2025-06-09T12:38:03.151Z,1749473000.0,64.1,64.1,43.506614,43.506612,43.506618,43.506624,,126.6,...,SenseBoard,master,106.975,68.713,Levi,0.906824,-2.630242,-1.391341,21.377,147.977
46263,2025-06-09T12:48:54.354Z,1749473000.0,51.3,51.3,43.504706,43.504707,43.504701,43.504696,1.0,83.7,...,SenseBoard,slave,106.975,47.292,Levi,1.747034,-0.830824,-1.845718,12.7,96.4
43817,2025-06-09T12:38:32.852Z,1749473000.0,55.1,55.1,43.503553,43.503551,43.503558,43.503564,,117.778,...,SenseBoard,master,106.975,68.713,Levi,-0.156257,2.472131,1.735256,19.9,137.678


In [8]:
only_karl_last_runs_light = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
52721,2025-06-09T13:18:49.056Z,1749475000.0,59.8,59.8,43.503206,43.503208,43.503202,43.503196,,120.5,...,SenseBoard,master,100.975,53.803,Levi,-4.779742,-7.061243,0.317362,19.6,140.1
55269,2025-06-09T13:28:07.563Z,1749476000.0,57.5,57.5,43.506337,43.506339,43.506333,43.506327,,73.2,...,SenseBoard,slave,100.975,49.802,Levi,3.615211,5.829083,0.930321,13.5,86.7
49131,2025-06-09T13:05:32.064Z,1749474000.0,55.5,55.5,43.507921,43.507919,43.507926,43.507932,,120.1,...,SenseBoard,slave,100.975,61.789,Levi,1.946237,-0.77289,0.438948,15.0,135.1
55180,2025-06-09T13:27:58.654Z,1749476000.0,58.1,58.1,43.505265,43.505267,43.50526,43.505254,,105.7,...,SenseBoard,slave,100.975,49.802,Levi,0.705821,0.770546,-0.061454,15.6,121.3
54307,2025-06-09T13:25:37.256Z,1749476000.0,48.4,48.4,43.502189,43.502187,43.502193,43.502199,1.0,123.2,...,SenseBoard,slave,100.975,61.205,Levi,11.347376,3.054158,9.230215,12.1,135.3


In [17]:
t_test(only_karl_first_runs_heavy,only_karl_last_runs_light) #general

print("\nUpwind and downwind for Karl:")
print(f"\nWeight of Karl on the first runs: {only_karl_first_runs_heavy['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light['SOG'].mean()}, std SOG: {only_karl_last_runs_light['SOG'].std()}")

T-statistic: -7.879, p-value: 0.000000000000004
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

Weight of Karl on the first runs: 106.97500000000001, average SOG: 23.94727146332986, std SOG: 2.042511601997199
Weight of Karl on the last runs: 100.975, average SOG: 24.320151187904965, std SOG: 1.962379480136006


In [10]:
only_karl_first_runs_heavy_upwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0]
only_karl_last_runs_light_upwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]

t_test(only_karl_first_runs_heavy_upwind,only_karl_last_runs_light_upwind) #upwind

print("\nUpwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_upwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_upwind['SOG'].mean()}")

T-statistic: -1.138, p-value: 0.255372507601836
The difference is not statistically significant, keeping data combined.

Upwind for Karl:
Weight of Karl on the first runs: 106.97499999999997, average SOG: 22.72190383681399
Weight of Karl on the last runs: 100.97500000000001, average SOG: 22.74386574074074


In [11]:
only_karl_first_runs_heavy_downwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"] <= 0]
only_karl_last_runs_light_downwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"] <= 0]
t_test(only_karl_first_runs_heavy_downwind, only_karl_last_runs_light_downwind)  # downwind

print("\nDownwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_downwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_downwind['SOG'].mean()}")


T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Karl:
Weight of Karl on the first runs: 106.97499999999998, average SOG: 27.031662591687045
Weight of Karl on the last runs: 100.97499999999998, average SOG: 26.32492639842983


## t test Gian heavy vs karl not heavy

In [12]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
45690,2025-06-09T12:46:22.560Z,1749473000.0,48.3,48.3,43.505341,43.505339,43.505345,43.505351,1.0,4.8,...,Karl Maeder,master,109.09,68.0,Levi,4.413179,4.386831,6.221925,125.916,130.716
47992,2025-06-09T12:54:33.360Z,1749474000.0,54.4,54.4,43.505738,43.505736,43.505742,43.505749,1.0,5.6,...,Karl Maeder,slave,109.09,68.88,Levi,-0.320356,0.305353,0.025599,121.4,127.0
45739,2025-06-09T12:46:27.444Z,1749473000.0,58.5,58.5,43.504817,43.504815,43.504821,43.504827,1.0,2.9,...,Karl Maeder,master,109.09,68.0,Levi,7.429106,3.594037,7.757554,112.4,115.3
48039,2025-06-09T12:54:38.056Z,1749474000.0,47.9,47.9,43.505273,43.505271,43.505277,43.505283,1.0,3.5,...,Karl Maeder,slave,109.09,68.88,Levi,1.245663,-1.611166,-0.424706,98.5,102.0
46962,2025-06-09T12:49:16.960Z,1749473000.0,51.5,51.5,43.507318,43.50732,43.507314,43.507308,1.0,7.8,...,Karl Maeder,master,109.09,47.292,Levi,10.073003,-2.828661,-9.343056,100.8,108.6


In [13]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
51249,2025-06-09T13:08:59.960Z,1749475000.0,42.6,42.6,43.508027,43.508029,43.508023,43.508017,,6.947,...,Karl Maeder,master,115.09,47.191,Levi,27.45208,0.294801,-21.51387,97.947,104.894
53184,2025-06-09T13:18:41.441Z,1749475000.0,44.7,44.7,43.502478,43.50248,43.502474,43.502468,,5.3,...,Karl Maeder,slave,115.09,53.803,Levi,-1.754801,-0.437622,1.258364,110.2,115.5
50005,2025-06-09T13:05:57.556Z,1749474000.0,56.6,56.6,43.505007,43.505005,43.505011,43.505017,,10.8,...,Karl Maeder,master,115.09,61.789,Levi,12.165673,2.358078,9.098748,132.0,142.8
55623,2025-06-09T13:27:53.160Z,1749476000.0,48.7,48.7,43.504464,43.504466,43.50446,43.504453,,4.4,...,Karl Maeder,master,115.09,49.802,Levi,0.936883,-0.067841,-0.775582,101.4,105.8
51122,2025-06-09T13:08:47.255Z,1749475000.0,40.1,40.1,43.506428,43.50643,43.506423,43.506417,,8.6,...,Karl Maeder,master,115.09,47.191,Levi,22.525345,-1.15968,-18.093013,102.937,111.537


In [14]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

print("\nUpwind and downwind for Gian:")
print(f"\nWeight of Gian on the first runs: {only_gian_first_runs_light['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy['SOG'].mean()}")

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

Weight of Gian on the first runs: 109.08999999999999, average SOG: 24.236319275008714
Weight of Gian on the last runs: 115.08999999999999, average SOG: 24.825573344872346


In [15]:
only_gian_first_runs_light_upwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0]
only_gian_last_runs_heavy_upwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]

t_test(only_gian_first_runs_light_upwind,only_gian_last_runs_heavy_upwind) #upwind

print("\nUpwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_upwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_upwind['SOG'].mean()}")

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind for Gian:
Weight of Gian on the first runs: 109.08999999999997, average SOG: 22.98097323600973
Weight of Gian on the last runs: 115.09000000000002, average SOG: 23.180486862442038


In [16]:
only_gian_first_runs_light_downwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"] <= 0]
only_gian_last_runs_heavy_downwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"] <= 0]
t_test(only_gian_first_runs_light_downwind, only_gian_last_runs_heavy_downwind)  # downwind

print("\nDownwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_downwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_downwind['SOG'].mean()}")


T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Gian:
Weight of Gian on the first runs: 109.08999999999999, average SOG: 27.405528255528253
Weight of Gian on the last runs: 115.08999999999997, average SOG: 26.918731563421826
