In [None]:
import os
import pandas as pd
import numpy as np

def process_trip_data(input_file, output_file):
    # Load the data
    df = pd.read_csv(input_file)
    
    # Filter for trips involving California
    df_ca = df[(df['origin_state'] == "CA") | (df['destination_state'] == "CA")]
    
    # Aggregate the data
    df_grouped = df_ca.groupby(['origin_state', 'destination_state'])['annual_total_trips'].sum().reset_index()
    
    # Save the aggregated data to a CSV
    temp_file = "pre.csv"
    df_grouped.to_csv(temp_file, index=False)
    
    # Process the aggregated data to add reverse trips
    data_map = {}
    with open(temp_file, 'r') as f:
        next(f)  # Skip the header
        for line in f:
            origin, destination, trips = line.strip().split(',')
            data_map[(origin, destination)] = int(trips)
    
    new_data = [['origin_state', 'destination_state', 'annual_total_trips', 'reverse_trips']]
    for (origin, destination), trips in data_map.items():
        reverse_trips = data_map.get((destination, origin), 0)
        if origin < destination:
            new_data.append([origin, destination, trips, reverse_trips])
        elif reverse_trips == 0:
            new_data.append([origin, destination, trips, reverse_trips])
    
    # Save the processed data
    with open("next.csv", 'w') as f:
        for row in new_data:
            f.write(",".join(map(str, row)) + "\n")
    
    # Load the processed data and calculate errors
    df_final = pd.read_csv("next.csv")
    df_final['abs_diff'] = abs(df_final['annual_total_trips'] - df_final['reverse_trips'])
    df_final['pct_error_per_min'] = df_final['abs_diff'] / df_final[['annual_total_trips', 'reverse_trips']].min(axis=1)
    
    # Save the final output
    df_final.to_csv(output_file, index=False)

# Example usage:
# process_trip_data("OD_Product_2021.csv", "balance_2021.csv")