In [16]:
import os
import pandas as pd

In [25]:
def check_samples(df: pd.DataFrame, label: str):
    full_len = len(df)
    print(f'{label} full samples count: {full_len}')
    
def check_negative_loss(df: pd.DataFrame, operator: str):
    # ensure loss is a float before the comparison
    full_len = len(df)
    df['loss'] = df['loss'].astype(float)
    negative_loss_row_df = df[df['loss'] < 0]
    if not negative_loss_row_df.empty:
        print(f'Found negative loss in {operator}, count: {len(negative_loss_row_df)} / {full_len}')
        print(negative_loss_row_df)
    else:
        print(f'Not found negative loss in {operator}, full length: {full_len}')
        
def check_zero_tput(df: pd.DataFrame, operator: str):
    full_len = len(df)
    zero_tput_row_df = df[df['throughput_mbps'] == 0]
    if not zero_tput_row_df.empty:
        print(f'Found zero throughput in {operator}, count: {len(zero_tput_row_df)} / {full_len}')
        # print(zero_tput_row_df)
    else:
        print(f'Not found zero throughput in {operator}, full length: {full_len}')

In [20]:
dataset_dir = os.path.join(os.getcwd(), '../datasets/maine_starlink_trip/throughput')

att_tcp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'att_tcp_downlink.csv'))
verizon_tcp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'verizon_tcp_downlink.csv'))
starlink_tcp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_tcp_downlink.csv'))

att_tcp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'att_tcp_uplink.csv'))
verizon_tcp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'verizon_tcp_uplink.csv'))
starlink_tcp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_tcp_uplink.csv'))

att_udp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'att_udp_downlink.csv'))
verizon_udp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'verizon_udp_downlink.csv'))
starlink_udp_dl_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_udp_downlink.csv'))

att_udp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'att_udp_uplink.csv'))
verizon_udp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'verizon_udp_uplink.csv'))
starlink_udp_ul_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_udp_uplink.csv'))

In [23]:
check_samples(att_tcp_dl_df, 'att_tcp_dl')
check_samples(verizon_tcp_dl_df, 'verizon_tcp_dl')
check_samples(starlink_tcp_dl_df, 'starlink_tcp_dl')
print('--------')
check_samples(att_tcp_ul_df, 'att_tcp_ul')
check_samples(verizon_tcp_ul_df, 'verizon_tcp_ul')
check_samples(starlink_tcp_ul_df, 'starlink_tcp_ul')
print('--------')
check_samples(att_udp_ul_df, 'att_udp_ul')
check_samples(verizon_udp_ul_df, 'verizon_udp_ul')
check_samples(starlink_udp_ul_df, 'starlink_udp_ul')
print('--------')
check_samples(att_udp_dl_df, 'att_udp_dl')
check_samples(verizon_udp_dl_df, 'verizon_udp_dl')
check_samples(starlink_udp_dl_df, 'starlink_udp_dl')

att_tcp_dl full samples count: 17935
verizon_tcp_dl full samples count: 17132
starlink_tcp_dl full samples count: 17367
--------
att_tcp_ul full samples count: 15240
verizon_tcp_ul full samples count: 15330
starlink_tcp_ul full samples count: 14986
--------
att_udp_ul full samples count: 9189
verizon_udp_ul full samples count: 8377
starlink_udp_ul full samples count: 8461
--------
att_udp_dl full samples count: 12970
verizon_udp_dl full samples count: 8255
starlink_udp_dl full samples count: 14769


In [26]:
check_zero_tput(att_tcp_dl_df, 'att_tcp_dl')
check_zero_tput(verizon_tcp_dl_df, 'verizon_tcp_dl')
check_zero_tput(starlink_tcp_dl_df, 'starlink_tcp_dl')
print('--------')
check_zero_tput(att_tcp_ul_df, 'att_tcp_ul')
check_zero_tput(verizon_tcp_ul_df, 'verizon_tcp_ul')
check_zero_tput(starlink_tcp_ul_df, 'starlink_tcp_ul')
print('--------')
check_zero_tput(att_udp_ul_df, 'att_udp_ul')
check_zero_tput(verizon_udp_ul_df, 'verizon_udp_ul')
check_zero_tput(starlink_udp_ul_df, 'starlink_udp_ul')
print('--------')
check_zero_tput(att_udp_dl_df, 'att_udp_dl')
check_zero_tput(verizon_udp_dl_df, 'verizon_udp_dl')
check_zero_tput(starlink_udp_dl_df, 'starlink_udp_dl')



Found zero throughput in att_tcp_dl, count: 1297 / 17935
Found zero throughput in verizon_tcp_dl, count: 1364 / 17132
Found zero throughput in starlink_tcp_dl, count: 398 / 17367
--------
Found zero throughput in att_tcp_ul, count: 2830 / 15240
Found zero throughput in verizon_tcp_ul, count: 3287 / 15330
Found zero throughput in starlink_tcp_ul, count: 278 / 14986
--------
Found zero throughput in att_udp_ul, count: 254 / 9189
Found zero throughput in verizon_udp_ul, count: 210 / 8377
Found zero throughput in starlink_udp_ul, count: 31 / 8461
--------
Found zero throughput in att_udp_dl, count: 269 / 12970
Found zero throughput in verizon_udp_dl, count: 319 / 8255
Found zero throughput in starlink_udp_dl, count: 154 / 14769


In [15]:
check_negative_loss(att_udp_dl_df, 'att')
check_negative_loss(verizon_udp_dl_df, 'verizon')
check_negative_loss(starlink_udp_dl_df, 'starlink')

Found negative loss in att, count: 26 / 12970
                             time  throughput_mbps  pkt_drop  pkt_total  \
986    2024-05-29T11:49:36.744642             5.15        -4        226   
1774   2024-05-29T16:44:29.323295            41.80      -365       1503   
2960   2024-05-29T12:46:27.203373             7.35       -43        285   
2964   2024-05-29T12:46:27.203755            20.40      -160        751   
4568   2024-05-27T11:27:30.526301            18.00       -13        789   
5290   2024-05-27T14:15:21.699871             2.26       -20         81   
5398   2024-05-27T14:16:10.705795             9.68        -5        427   
5399   2024-05-27T14:16:10.705895             0.54       -23          1   
5424   2024-05-27T14:16:10.708504             3.20       -43        100   
5426   2024-05-27T14:16:35.698812             5.80       -45        214   
5799   2024-05-27T18:28:25.597873             3.50       -35        121   
5845   2024-05-27T18:28:50.097822             7.68    

In [18]:
# Ping data inspection
import os
import pandas as pd

dataset_dir = os.path.join(os.getcwd(), '../datasets/maine_starlink_trip/ping')

att_ping_df = pd.read_csv(os.path.join(dataset_dir, 'att_ping.csv'))
verizon_ping_df = pd.read_csv(os.path.join(dataset_dir, 'verizon_ping.csv'))
starlink_ping_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_ping.csv'))

check_samples(att_ping_df, 'att_ping')
check_samples(verizon_ping_df, 'verizon_ping')
check_samples(starlink_ping_df, 'starlink_ping')


att_ping full samples count: 18079
verizon_ping full samples count: 18582
starlink_ping full samples count: 17938


In [27]:
import os
import pandas as pd

dataset_dir = os.path.join(os.getcwd(), '../datasets/maine_starlink_trip/starlink')

starlink_metric_df = pd.read_csv(os.path.join(dataset_dir, 'starlink_metric.csv'))
check_samples(starlink_metric_df, 'starlink_metric')

starlink_metric full samples count: 102930
