In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_10juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-10")]

## T test on the TWS between runs 1 to 5 where Karl is on the Levitaz and Gian is on the Chubanga and runs 6 to 10 is the other way around

In [4]:
first_runs = ["10_06_Run1","10_06_Run2","10_06_Run3","10_06_Run4","10_06_Run5"]
data_10juin_first_runs = data_10juin[data_10juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["10_06_Run6","10_06_Run7","10_06_Run8","10_06_Run9","10_06_Run10"]
data_10juin_last_runs = data_10juin[data_10juin["run"].isin(last_runs) ]

In [6]:
t_test(data_10juin_first_runs,data_10juin_last_runs, target="TWS")
print(f"Average TWS in Group 1: {data_10juin_first_runs['TWS'].mean()}")
print(f"Average TWS in Group 2: {data_10juin_last_runs['TWS'].mean()}")

T-statistic: -208.083, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
Average TWS in Group 1: 6.194841001646305
Average TWS in Group 2: 8.062942392046008


## t test karl levi vs karl chub

In [7]:
only_karl_first_runs_levi = data_10juin_first_runs[
    (data_10juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_levi.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
62812,2025-06-10T12:39:40.657Z,1749559000.0,46.7,46.7,43.53075,43.530752,43.530746,43.53074,,96.2,...,Gian Stragiotti,slave,102.89,54.391,Levi,4.944839,0.070506,-2.81074,8.444,104.644
64271,2025-06-10T12:44:39.151Z,1749559000.0,58.4,58.4,43.532967,43.532965,43.532971,43.532977,,141.7,...,Gian Stragiotti,master,102.89,69.201,Levi,-0.171058,-1.691505,-1.443706,21.2,162.9
67255,2025-06-10T12:52:56.657Z,1749560000.0,53.5,53.5,43.535334,43.535333,43.535339,43.535344,,123.857,...,Gian Stragiotti,slave,102.89,51.991,Levi,4.15946,-3.263066,-0.118868,16.567,140.424
59747,2025-06-10T12:29:37.059Z,1749559000.0,55.1,55.1,43.529764,43.529762,43.529768,43.529774,,110.6,...,Gian Stragiotti,master,102.89,73.306,Levi,-9.545139,-8.2375,-12.329677,10.8,121.4
65228,2025-06-10T12:47:21.151Z,1749560000.0,43.2,43.2,43.530957,43.530959,43.530953,43.530946,,85.9,...,Gian Stragiotti,master,102.89,48.401,Levi,-6.449377,-1.908719,2.831928,8.0,93.9


In [8]:
only_karl_last_runs_chub = data_10juin_last_runs[
    (data_10juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_10juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_10juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_chub.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,opponent_name,boat_role,boat_weight,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,side_line2,total_line2
77015,2025-06-10T13:40:31.953Z,1749563000.0,65.0,65.0,43.531199,43.531197,43.531203,43.531209,,134.5,...,SenseBoard,master,99.995,66.197,Chub,-6.859894,3.854382,-1.736114,16.071,150.571
74653,2025-06-10T13:33:20.959Z,1749562000.0,68.1,68.1,43.532653,43.532651,43.532657,43.532663,,112.7,...,SenseBoard,slave,99.995,62.598,Chub,6.029791,8.540281,10.327343,8.147,120.847
79069,2025-06-10T13:49:32.753Z,1749563000.0,60.2,60.2,43.533025,43.533024,43.53303,43.533036,,128.4,...,SenseBoard,slave,99.995,67.905,Chub,3.826458,-2.727454,0.305286,14.4,142.8
70126,2025-06-10T13:13:32.460Z,1749561000.0,58.5,58.5,43.533635,43.533634,43.53364,43.533646,,113.5,...,SenseBoard,slave,99.995,59.901,Chub,6.12799,-9.685361,-3.512055,15.4,128.9
72295,2025-06-10T13:23:02.658Z,1749562000.0,57.7,57.7,43.532545,43.532544,43.532549,43.532555,,116.2,...,SenseBoard,master,99.995,67.8,Chub,-4.64101,-1.796695,-4.336587,12.443,128.643


In [11]:
t_test(only_karl_first_runs_levi,only_karl_last_runs_chub) #general

print("\nUpwind and downwind for Karl:")
print(f"\Mast of Karl on the first runs: {only_karl_first_runs_levi['mast_brand'].unique()}, average SOG: {only_karl_first_runs_levi['SOG'].mean()}, std SOG: {only_karl_first_runs_levi['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_chub['mast_brand'].unique()}, average SOG: {only_karl_last_runs_chub['SOG'].mean()}, std SOG: {only_karl_last_runs_chub['SOG'].std()}")

T-statistic: -15.956, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:
\Mast of Karl on the first runs: ['Levi'], average SOG: 23.797710718002083, std SOG: 2.0310166378155836
Weight of Karl on the last runs: ['Chub'], average SOG: 24.440533696922476, std SOG: 2.1737562502878256


In [10]:
only_karl_first_runs_heavy_upwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0]
only_karl_last_runs_light_upwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]

t_test(only_karl_first_runs_heavy_upwind,only_karl_last_runs_light_upwind) #upwind

print("\nUpwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_upwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_upwind['SOG'].mean()}")

NameError: name 'only_karl_first_runs_heavy' is not defined

In [None]:
only_karl_first_runs_heavy_downwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"] <= 0]
only_karl_last_runs_light_downwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"] <= 0]
t_test(only_karl_first_runs_heavy_downwind, only_karl_last_runs_light_downwind)  # downwind

print("\nDownwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_downwind['SOG'].mean()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_downwind['SOG'].mean()}")


T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Karl:
Weight of Karl on the first runs: 106.97499999999998, average SOG: 27.031662591687045
Weight of Karl on the last runs: 100.97499999999998, average SOG: 26.32492639842983


## t test Gian heavy vs karl not heavy

In [None]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
46650,2025-06-09T12:48:45.762Z,1749473000.0,58.6,58.6,43.503634,43.503636,43.50363,43.503624,1.0,3.585,...,09_06_Run6,2,SenseBoard,Karl Maeder,master,109.09,47.292,-0.07155,0.897393,0.662285
48053,2025-06-09T12:54:39.454Z,1749474000.0,59.2,59.2,43.50514,43.505138,43.505144,43.50515,1.0,6.3,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,2.34796,-3.289738,-0.994772
48183,2025-06-09T12:54:52.453Z,1749474000.0,52.5,52.5,43.503842,43.50384,43.503846,43.503852,1.0,5.775,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,2.206555,-10.767289,-6.778884
46925,2025-06-09T12:49:13.256Z,1749473000.0,49.8,49.8,43.506886,43.506887,43.506881,43.506875,1.0,5.9,...,09_06_Run6,2,SenseBoard,Karl Maeder,master,109.09,47.292,9.263385,-3.137818,-8.957542
48209,2025-06-09T12:54:55.052Z,1749474000.0,56.5,56.5,43.503584,43.503582,43.503588,43.503593,1.0,4.2,...,09_06_Run7,1,SenseBoard,Karl Maeder,slave,109.09,68.88,1.505398,-13.502514,-9.307664


In [None]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,run,interval_id,boat_name,opponent_name,boat_role,boat_weight,interval_duration,gain_forward,gain_lateral,gain_vmg
54934,2025-06-09T13:25:38.654Z,1749476000.0,60.9,60.9,43.501833,43.501831,43.501838,43.501844,,7.6,...,09_06_Run10,1,SenseBoard,Karl Maeder,master,115.09,61.205,11.676334,3.659835,9.909272
51051,2025-06-09T13:08:40.158Z,1749475000.0,56.2,56.2,43.505549,43.505551,43.505545,43.505539,,3.1,...,09_06_Run8,2,SenseBoard,Karl Maeder,master,115.09,47.191,19.351912,-2.313902,-16.315828
56908,2025-06-09T13:34:42.756Z,1749476000.0,56.1,56.1,43.50561,43.505609,43.505615,43.505621,1.0,5.1,...,09_06_Run11,1,SenseBoard,Karl Maeder,slave,115.09,67.09,0.341986,-2.793751,-1.954839
56750,2025-06-09T13:34:26.956Z,1749476000.0,49.9,49.9,43.507254,43.507252,43.507258,43.507265,1.0,4.282,...,09_06_Run11,1,SenseBoard,Karl Maeder,slave,115.09,67.09,0.466104,-4.862079,-3.436021
54768,2025-06-09T13:25:22.056Z,1749476000.0,50.9,50.9,43.503654,43.503652,43.503658,43.503665,,9.3,...,09_06_Run10,1,SenseBoard,Karl Maeder,master,115.09,61.205,9.271357,2.942259,7.873076


In [None]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

print("\nUpwind and downwind for Gian:")
print(f"\nWeight of Gian on the first runs: {only_gian_first_runs_light['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy['SOG'].mean()}")

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

Weight of Gian on the first runs: 109.08999999999999, average SOG: 24.236319275008714
Weight of Gian on the last runs: 115.08999999999999, average SOG: 24.825573344872346


In [None]:
only_gian_first_runs_light_upwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0]
only_gian_last_runs_heavy_upwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]

t_test(only_gian_first_runs_light_upwind,only_gian_last_runs_heavy_upwind) #upwind

print("\nUpwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_upwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_upwind['SOG'].mean()}")

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind for Gian:
Weight of Gian on the first runs: 109.08999999999997, average SOG: 22.98097323600973
Weight of Gian on the last runs: 115.09000000000002, average SOG: 23.180486862442038


In [None]:
only_gian_first_runs_light_downwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"] <= 0]
only_gian_last_runs_heavy_downwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"] <= 0]
t_test(only_gian_first_runs_light_downwind, only_gian_last_runs_heavy_downwind)  # downwind

print("\nDownwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_downwind['SOG'].mean()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_downwind['SOG'].mean()}")


T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Gian:
Weight of Gian on the first runs: 109.08999999999999, average SOG: 27.405528255528253
Weight of Gian on the last runs: 115.08999999999997, average SOG: 26.918731563421826
