In [4]:
print("Parse all nuttcp tcp_downlink files in the directory structure and save the extracted data to a CSV file")

import pytz
from datetime import datetime
# Find downlink files in a directory structure

import os
import re 

def find_files(base_dir, prefix, suffix):
    target_files = []

    # Walk through the directory structure
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.startswith(prefix) and file.endswith(suffix):
                target_files.append(os.path.join(root, file))
    return target_files


def find_tcp_downlink_file(base_dir):
    return find_files(base_dir, prefix="tcp_downlink", suffix=".out")

def find_tcp_uplink_file(base_dir):
    return find_files(base_dir, prefix="tcp_uplink", suffix=".out")

def format_datetime_as_iso_8601(dt: datetime):
    """
    Format the time in the EDT timezone
    :param dt: 
    :return: 
    """
    return dt.isoformat()

def append_timezone(dt: datetime, timezone_str: str, is_dst: bool = True):
    timezone = pytz.timezone(timezone_str)
    dt_aware = timezone.localize(dt, is_dst=is_dst)  # is_dst=True for daylight saving time
    return dt_aware

def append_edt_timezone(dt: datetime, is_dst: bool = True):
    return append_timezone(dt, "US/Eastern", is_dst)

def parse_nuttcp_timestamp(timestamp):
    # Parse the timestamp in the format of "2024-05-27 15:00:00.000000"
    return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")

def format_nuttcp_timestamp(dt_str: str):
    dt = parse_nuttcp_timestamp(dt_str)
    dt_edt = append_edt_timezone(dt)
    return format_datetime_as_iso_8601(dt_edt)


def parse_tcp_downlink_content(content):
    # Regular expression to match the target line
    pattern = re.compile(
        r"\[(.*?)\]\s+.*?=\s+([\d.]+)\s+Mbps\s+(\d+)\s+retrans\s+(\d+)\s+KB-cwnd"
    )
    
    extracted_data = []
    
    for line in content.splitlines():
        match = pattern.search(line)
        if match:
            dt, throughput, retrans, cwnd = match.groups()
            dt_isoformat = format_nuttcp_timestamp(dt)
            row = [dt_isoformat, throughput, retrans, cwnd]
            extracted_data.append(','.join(row))
    return extracted_data



def save_to_csv(data, output_file):
    header = ['time', 'throughput_mbps', 'retrans', 'cwnd_kb']
    with open(output_file, 'w') as f:
        f.write(','.join(header) + '\n')
        for line in data:
            f.write(line + '\n')


# Specify the base directory (update this path as per your directory structure)
base_directory = "../outputs/maine_starlink_trip/"

# Find the target files
tcp_downlink_files = find_tcp_downlink_file(base_directory)

# Display the found files
if tcp_downlink_files:
    print("Found tcp_downlink files")
else:
    print("No tcp_downlink files found.")

# Example to read and print the content of the found files
for file in tcp_downlink_files:
    try:
        with open(file, 'r') as f:
            content = f.read()
            extracted_data = parse_tcp_downlink_content(content)
            csv_file_path = file.replace('.out', '.csv')
            save_to_csv(extracted_data, csv_file_path)
            print(f"Extracted data is saved to {csv_file_path}")
    except Exception as e:
        print(f"Error reading {file}: {e}")

Found tcp_downlink files
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/115627940/tcp_downlink_115630977.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/163732935/tcp_downlink_163735969.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/114332240/tcp_downlink_114335268.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/124017808/tcp_downlink_124020848.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/113156102/tcp_downlink_113159143.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/160604145/tcp_downlink_160607172.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/162104355/tcp_downlink_162107388.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/20240529/125701833/tcp_downlink_125704870.csv
Extracted data is saved to ../outputs/maine_starlink_trip/starlink/2024