In [19]:
import pandas as pd
import os

def filter_stream_data(directory):
    # File paths
    rin_path = os.path.join(directory, 'stream_rates_rin.csv')
    rout_path = os.path.join(directory, 'stream_rates_rout.csv')
    test_info_path = os.path.join(directory, 'test_info.csv')
    
    # Use comma separator
    rin_df = pd.read_csv(rin_path, header=None, sep=',')
    rout_df = pd.read_csv(rout_path, header=None, sep=',')
    test_info_df = pd.read_csv(test_info_path, header=None, delim_whitespace=True)

    # Print head for debugging
    print("First few rin values:", rin_df[0].head())
    print("First few rout values:", rout_df[0].head())

    # Convert to numeric just in case
    rin_df[0] = pd.to_numeric(rin_df[0], errors='coerce')
    rout_df[0] = pd.to_numeric(rout_df[0], errors='coerce')

    # Valid filter
    valid_rows = (rin_df[0] <= 500) & (rout_df[0] <= 500)
    valid_rows = valid_rows & (~rin_df[0].isna()) & (~rout_df[0].isna())

    # Apply filter
    filtered_rin = rin_df[valid_rows].reset_index(drop=True)
    filtered_rout = rout_df[valid_rows].reset_index(drop=True)
    filtered_test_info = test_info_df[valid_rows].reset_index(drop=True)

    # Save outputs
    filtered_rin.to_csv(os.path.join(directory, 'filtered_stream_rates_rin.csv'), index=False, header=False)
    filtered_rout.to_csv(os.path.join(directory, 'filtered_stream_rates_rout.csv'), index=False, header=False)
    filtered_test_info.to_csv(os.path.join(directory, 'filtered_test_info.csv'), index=False, header=False)

    # Report
    print(f"Original rows: {len(rin_df)}")
    print(f"Filtered rows retained: {len(filtered_rin)}")
    print(f"Rows removed: {len(rin_df) - len(filtered_rin)}")

# Run it
filter_stream_data('Data_D3/')


First few rin values: 0    233.45
1    232.47
2    232.90
3    233.03
4    229.63
Name: 0, dtype: float64
First few rout values: 0    151.99
1    150.95
2    148.26
3    107.89
4    110.59
Name: 0, dtype: float64
Original rows: 600
Filtered rows retained: 594
Rows removed: 6


In [25]:
import pandas as pd

def split_column_in_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path, header=None)

    # Split the single column into two columns
    # Assuming the column with comma-separated values is the first column (index 0)
    df[[0, 1]] = df[0].str.split(',', expand=True)

    # Save the result to a new CSV file
    df.to_csv('Data_D1/filtered_test_info.csv', index=False, header=False)

    print("Data has been split and saved to 'split_output.csv'.")

# Provide the path to your CSV file
split_column_in_csv('Data_D1/filtered_test_info.csv')


Data has been split and saved to 'split_output.csv'.
