In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats as stats

def t_test(df1, df2, target="SOG"):
    t_stat, p_value = stats.ttest_ind(df1[target].dropna(), df2[target].dropna())
    print(f"T-statistic: {t_stat:.3f}, p-value: {p_value:.15f}")
    
    # If p-value is less than 0.05, the difference is statistically significant
    if p_value < 0.05:
        print("The difference is statistically significant, keeping data split.")
    else:
        print("The difference is not statistically significant, keeping data combined.")


In [2]:
df = pd.read_csv("all_data.csv")

In [3]:
data_9juin = df[df["ISODateTimeUTC"].str.startswith("2025-06-09")]

## T test on the TWS between runs 5,6,7 where Karl holds the weights and runs 8,9,10,11 where Gian holds 6kgs

In [4]:
first_runs = ["09_06_Run5","09_06_Run6","09_06_Run7"]
data_9juin_first_runs = data_9juin[data_9juin["run"].isin(first_runs) ]

In [5]:
last_runs = ["09_06_Run8","09_06_Run9","09_06_Run10","09_06_Run11"]
data_9juin_last_runs = data_9juin[data_9juin["run"].isin(last_runs) ]

In [6]:
t_test(data_9juin_first_runs,data_9juin_last_runs, target="TWS")
print(data_9juin_first_runs["TWS"].mean(),data_9juin_last_runs["TWS"].mean())
print(f"Average TWS in Group 1: {data_9juin_first_runs['TWS'].mean()},STD TWS in Group 1: {data_9juin_first_runs['TWS'].std()}")
print(f"Average TWS in Group 2: {data_9juin_last_runs['TWS'].mean()},STD TWS in Group 2: {data_9juin_last_runs['TWS'].std()}")

T-statistic: -20.004, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.
7.2590247128437175 7.541569498486814
Average TWS in Group 1: 7.2590247128437175,STD TWS in Group 1: 0.909090314209303
Average TWS in Group 2: 7.541569498486814,STD TWS in Group 2: 0.7956380127932735


## t test karl heavy vs karl not heavy

In [7]:
only_karl_first_runs_heavy = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_first_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
44932,2025-06-09T12:46:14.852Z,1749473000.0,61.3,61.3,43.506266,43.506264,43.50627,43.506276,1.0,135.6,...,68.0,Levi,0.64323,2.377093,2.159821,7.0,8.2,135.6,15.2,150.8
47403,2025-06-09T12:54:43.361Z,1749474000.0,52.8,52.8,43.504635,43.504633,43.504639,43.504646,,119.1,...,68.88,Levi,4.501874,-5.772479,-1.507673,4.6,4.8,119.1,9.4,128.5
46414,2025-06-09T12:49:09.452Z,1749473000.0,46.3,46.3,43.506442,43.506443,43.506437,43.506431,1.0,77.8,...,47.292,Levi,6.626736,-3.017004,-6.933485,5.4,7.5,77.8,12.9,90.7
44859,2025-06-09T12:46:07.561Z,1749473000.0,53.0,53.0,43.507032,43.50703,43.507036,43.507042,1.0,130.7,...,68.0,Levi,2.864175,0.429934,2.273476,5.546,8.568,130.7,14.114,144.814
43373,2025-06-09T12:37:48.452Z,1749473000.0,60.1,60.1,43.508107,43.508106,43.508112,43.508118,,120.514,...,68.713,Levi,0.89665,1.193485,1.49052,6.5,6.8,120.514,13.3,133.814


In [8]:
only_karl_last_runs_light = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Karl Maeder") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Gian Stragiotti"))
]
only_karl_last_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
51496,2025-06-09T13:16:01.963Z,1749475000.0,59.4,59.4,43.506308,43.506306,43.506312,43.506319,,104.4,...,68.692,Levi,-1.572151,3.585873,1.006222,7.4,9.7,104.4,17.1,121.5
55158,2025-06-09T13:27:56.454Z,1749476000.0,44.5,44.5,43.505004,43.505006,43.505,43.504994,,105.5,...,49.802,Levi,-0.132746,0.391247,0.348414,3.5,5.7,105.5,9.2,114.7
57647,2025-06-09T13:37:53.168Z,1749476000.0,54.7,54.7,43.506513,43.506515,43.506509,43.506502,,83.9,...,52.596,Levi,-12.065017,-2.157995,8.174451,5.9,7.0,83.9,12.9,96.8
53807,2025-06-09T13:24:47.253Z,1749475000.0,58.0,58.0,43.507567,43.507565,43.507571,43.507577,1.0,108.4,...,61.205,Levi,0.061357,0.912712,0.740584,5.418,5.5,108.4,10.918,119.318
56354,2025-06-09T13:34:54.456Z,1749476000.0,53.0,53.0,43.504291,43.504289,43.504295,43.504301,,125.2,...,67.09,Levi,2.594618,-5.471212,-2.693445,8.2,9.9,125.2,18.1,143.3


In [9]:
t_test(only_karl_first_runs_heavy,only_karl_last_runs_light) #general

print("\nUpwind and downwind for Karl:")
print(f"\nWeight of Karl on the first runs: {only_karl_first_runs_heavy['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light['SOG'].mean()}, std SOG: {only_karl_last_runs_light['SOG'].std()}")

T-statistic: -7.879, p-value: 0.000000000000004
The difference is statistically significant, keeping data split.

Upwind and downwind for Karl:

Weight of Karl on the first runs: 106.97500000000001, average SOG: 23.94727146332986, std SOG: 2.042511601997199
Weight of Karl on the last runs: 100.975, average SOG: 24.320151187904965, std SOG: 1.962379480136006


In [10]:
only_karl_first_runs_heavy_upwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"]>0]
only_karl_last_runs_light_upwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"]>0]

t_test(only_karl_first_runs_heavy_upwind,only_karl_last_runs_light_upwind) #upwind

print("\nUpwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_upwind['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy_upwind['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_upwind['SOG'].mean()}, std SOG: {only_karl_last_runs_light_upwind['SOG'].std()}")

T-statistic: -1.138, p-value: 0.255372507601836
The difference is not statistically significant, keeping data combined.

Upwind for Karl:
Weight of Karl on the first runs: 106.97499999999997, average SOG: 22.72190383681399, std SOG: 0.5513928546466119
Weight of Karl on the last runs: 100.97500000000001, average SOG: 22.74386574074074, std SOG: 0.725225703905136


In [11]:
only_karl_first_runs_heavy_downwind = only_karl_first_runs_heavy[only_karl_first_runs_heavy["TWA"] <= 0]
only_karl_last_runs_light_downwind = only_karl_last_runs_light[only_karl_last_runs_light["TWA"] <= 0]
t_test(only_karl_first_runs_heavy_downwind, only_karl_last_runs_light_downwind)  # downwind

print("\nDownwind for Karl:")
print(f"Weight of Karl on the first runs: {only_karl_first_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_karl_first_runs_heavy_downwind['SOG'].mean()}, std SOG: {only_karl_first_runs_heavy_downwind['SOG'].std()}")
print(f"Weight of Karl on the last runs: {only_karl_last_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_karl_last_runs_light_downwind['SOG'].mean()}, std SOG: {only_karl_last_runs_light_downwind['SOG'].std()}")


T-statistic: 18.894, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Karl:
Weight of Karl on the first runs: 106.97499999999998, average SOG: 27.031662591687045, std SOG: 0.7814322747955662
Weight of Karl on the last runs: 100.97499999999998, average SOG: 26.32492639842983, std SOG: 0.9483400838098708


## t test Gian heavy vs karl not heavy

In [12]:
only_gian_first_runs_light = data_9juin_first_runs[
    (data_9juin_first_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_first_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_first_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_first_runs_light.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
46874,2025-06-09T12:49:08.165Z,1749473000.0,51.3,51.3,43.506286,43.506287,43.506282,43.506276,1.0,5.0,...,47.292,Levi,8.856348,-2.162933,-8.0087,5.0,5.7,93.7,10.7,104.4
46052,2025-06-09T12:46:58.760Z,1749473000.0,57.1,57.1,43.501413,43.501411,43.501417,43.501424,1.0,6.6,...,68.0,Levi,19.051329,4.154643,15.926333,6.6,7.5,118.925,14.1,133.025
46838,2025-06-09T12:49:04.546Z,1749473000.0,54.7,54.7,43.505869,43.505871,43.505865,43.505859,1.0,8.3,...,47.292,Levi,8.712201,-1.573755,-7.49899,8.3,8.6,100.4,16.9,117.3
44669,2025-06-09T12:38:49.259Z,1749473000.0,50.0,50.0,43.501981,43.501979,43.501985,43.501991,1.0,4.4,...,68.713,Levi,-4.388819,-3.988538,-5.895478,4.4,10.5,100.465,14.9,115.365
45976,2025-06-09T12:46:51.160Z,1749473000.0,48.8,48.8,43.502255,43.502253,43.502259,43.502265,1.0,4.1,...,68.0,Levi,17.83779,3.084701,14.362618,4.1,6.7,105.0,10.8,115.8


In [13]:
only_gian_last_runs_heavy = data_9juin_last_runs[
    (data_9juin_last_runs["boat_name"] == "Gian Stragiotti") |
    ((data_9juin_last_runs["boat_name"] == "SenseBoard") & 
     (data_9juin_last_runs["opponent_name"] == "Karl Maeder"))
]
only_gian_last_runs_heavy.sample(5)

Unnamed: 0,ISODateTimeUTC,SecondsSince1970,Heel_Abs,Heel_Lwd,Lat,LatBow,LatCenter,LatStern,Leg,Line_C,...,interval_duration,mast_brand,gain_forward,gain_lateral,gain_vmg,Line_R2,Line_L2,Line_C2,side_line2,total_line2
52434,2025-06-09T13:16:27.051Z,1749475000.0,62.4,62.4,43.503741,43.503739,43.503746,43.503752,1.0,5.5,...,68.692,Levi,-7.425264,-3.634198,-7.961206,5.5,9.8,116.2,15.3,131.5
58210,2025-06-09T13:37:56.757Z,1749476000.0,33.4,33.4,43.507198,43.5072,43.507194,43.507188,,4.9,...,52.596,Levi,-14.251034,0.097287,11.32204,4.9,13.0,114.3,17.9,132.2
56949,2025-06-09T13:34:46.858Z,1749476000.0,57.5,57.5,43.505171,43.50517,43.505176,43.505182,1.0,7.7,...,67.09,Levi,0.094406,-3.380109,-2.57434,7.7,10.6,129.5,18.3,147.8
55442,2025-06-09T13:27:35.061Z,1749476000.0,55.5,55.5,43.5023,43.502302,43.502296,43.50229,,7.7,...,49.802,Levi,-0.986751,0.274186,0.947757,7.0,7.7,102.283,14.7,116.983
49901,2025-06-09T13:05:47.160Z,1749474000.0,57.3,57.3,43.506151,43.506149,43.506155,43.506161,,5.0,...,61.789,Levi,10.00121,1.016809,6.56251,5.0,6.4,103.6,11.4,115.0


In [14]:
t_test(only_gian_first_runs_light,only_gian_last_runs_heavy) #general

print("\nUpwind and downwind for Gian:")
print(f"\nWeight of Gian on the first runs: {only_gian_first_runs_light['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light['SOG'].mean()}, std SOG: {only_gian_first_runs_light['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy['SOG'].std()}")

T-statistic: -11.650, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind and downwind for Gian:

Weight of Gian on the first runs: 109.08999999999999, average SOG: 24.236319275008714, std SOG: 2.135963842904484
Weight of Gian on the last runs: 115.08999999999999, average SOG: 24.825573344872346, std SOG: 2.1231026500669974


In [15]:
only_gian_first_runs_light_upwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"]>0]
only_gian_last_runs_heavy_upwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"]>0]

t_test(only_gian_first_runs_light_upwind,only_gian_last_runs_heavy_upwind) #upwind

print("\nUpwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_upwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_upwind['SOG'].mean()}, std SOG: {only_gian_first_runs_light_upwind['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_upwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_upwind['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy_upwind['SOG'].std()}")

T-statistic: -8.572, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Upwind for Gian:
Weight of Gian on the first runs: 109.08999999999997, average SOG: 22.98097323600973, std SOG: 0.7538625153686097
Weight of Gian on the last runs: 115.09000000000002, average SOG: 23.180486862442038, std SOG: 0.8136933474979365


In [16]:
only_gian_first_runs_light_downwind = only_gian_first_runs_light[only_gian_first_runs_light["TWA"] <= 0]
only_gian_last_runs_heavy_downwind = only_gian_last_runs_heavy[only_gian_last_runs_heavy["TWA"] <= 0]
t_test(only_gian_first_runs_light_downwind, only_gian_last_runs_heavy_downwind)  # downwind

print("\nDownwind for Gian:")
print(f"Weight of Gian on the first runs: {only_gian_first_runs_light_downwind['boat_weight'].mean()}, average SOG: {only_gian_first_runs_light_downwind['SOG'].mean()}, std SOG: {only_gian_first_runs_light_downwind['SOG'].std()}")
print(f"Weight of Gian on the last runs: {only_gian_last_runs_heavy_downwind['boat_weight'].mean()}, average SOG: {only_gian_last_runs_heavy_downwind['SOG'].mean()}, std SOG: {only_gian_last_runs_heavy_downwind['SOG'].std()}")


T-statistic: 10.288, p-value: 0.000000000000000
The difference is statistically significant, keeping data split.

Downwind for Gian:
Weight of Gian on the first runs: 109.08999999999999, average SOG: 27.405528255528253, std SOG: 0.7868109632056288
Weight of Gian on the last runs: 115.08999999999997, average SOG: 26.918731563421826, std SOG: 1.2548178334314914
