In [5]:
import pandas as pd
import os

def filter_stream_data(directory):
    # File paths
    rin_path = os.path.join(directory, 'stream_rates_rin.csv')
    rout_path = os.path.join(directory, 'stream_rates_rout.csv')
    test_info_path = os.path.join(directory, 'test_info.csv')
    
    # Read CSV files
    rin_df = pd.read_csv(rin_path, header=None, sep=',')
    rout_df = pd.read_csv(rout_path, header=None, sep=',')
    test_info_df = pd.read_csv(test_info_path, header=None, delim_whitespace=True)

    # Convert all values to numeric, coerce errors to NaN
    rin_df = rin_df.apply(pd.to_numeric, errors='coerce')
    rout_df = rout_df.apply(pd.to_numeric, errors='coerce')

    # Create mask where all values in a row are < 500 and not NaN
    valid_rin = (rin_df < 500).all(axis=1) & rin_df.notna().all(axis=1)
    valid_rout = (rout_df < 500).all(axis=1) & rout_df.notna().all(axis=1)

    # Combine masks
    valid_rows = valid_rin & valid_rout

    # Apply mask
    filtered_rin = rin_df[valid_rows].reset_index(drop=True)
    filtered_rout = rout_df[valid_rows].reset_index(drop=True)
    filtered_test_info = test_info_df[valid_rows].reset_index(drop=True)

    # Save outputs
    filtered_rin.to_csv(os.path.join(directory, 'filtered_stream_rates_rin.csv'), index=False, header=False)
    filtered_rout.to_csv(os.path.join(directory, 'filtered_stream_rates_rout.csv'), index=False, header=False)
    filtered_test_info.to_csv(os.path.join(directory, 'filtered_test_info.csv'), index=False, header=False)

    # Report
    print(f"Original rows: {len(rin_df)}")
    print(f"Filtered rows retained: {len(filtered_rin)}")
    print(f"Rows removed: {len(rin_df) - len(filtered_rin)}")

# Run it
filter_stream_data('Data_D1/')


Original rows: 2400
Filtered rows retained: 1316
Rows removed: 1084


In [6]:
import pandas as pd

def split_column_in_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path, header=None)

    # Split the single column into two columns
    # Assuming the column with comma-separated values is the first column (index 0)
    df[[0, 1]] = df[0].str.split(',', expand=True)

    # Save the result to a new CSV file
    df.to_csv('Data_D1/filtered_test_info.csv', index=False, header=False)

    print("Data has been split and saved to 'split_output.csv'.")

# Provide the path to your CSV file
split_column_in_csv('Data_D1/filtered_test_info.csv')


Data has been split and saved to 'split_output.csv'.
