In [None]:
import pandas as pd
import glob
import os

### !--- PARAMETERS
### Paramters of...
MIN_WIDTH = 1.0   # Lower bound (inclusive) for valid width (in meters)
MAX_WIDTH = 5.0   # Upper bound (inclusive) for valid width (in meters)

# !--- INPUT DIRECTORY
# Directory containing CSVs generated by the automated sidewalk width estimation tool
CSV_DIR = "../YOUR/PATH/outputs_automation"

# !--- OUTPUT DIRECTORY
# Folder where valid and invalid cases will be stored
OUTPUT_DIR = "../YOUR/PATH/manual_collection"
os.makedirs(OUTPUT_DIR, exist_ok=True)

### ---------- LOAD ALL CSVs ----------
csv_files = glob.glob(os.path.join(CSV_DIR, "*.csv"))

df_list = []
for file in csv_files:
    df = pd.read_csv(file)
    df_list.append(df)

combined_df = pd.concat(df_list, ignore_index=True)
print(f"Total CSV files loaded: {len(csv_files)}")

# ---------- CATEGORIZE Cases AS VALID / INVALID ----------
def categorize_validity(row):
    width = row.get('width', None)

    # Valid: width is not NaN and within [MIN_WIDTH, MAX_WIDTH]
    if pd.notna(width) and (MIN_WIDTH <= width <= MAX_WIDTH):
        return "valid"
    else:
        return "invalid"

combined_df['validity'] = combined_df.apply(categorize_validity, axis=1)

# ---------- SUMMARY TABLE ----------
summary = (
    combined_df
    .groupby('validity')
    .agg(count=('validity', 'size'))
    .reset_index()
)

total_rows = len(combined_df)
summary['percentage'] = summary['count'].apply(
    lambda c: f"{100 * c / total_rows:.2f}%"
)

print("=" * 60)
print("SUMMARY TABLE: Valid vs Invalid Cases")
print("=" * 60)
print(summary.to_string(index=False))
print("=" * 60)

# ---------- SAVE VALID / INVALID Cases ----------
valid_csv_path = os.path.join(OUTPUT_DIR, "valid_cases.csv")
invalid_csv_path = os.path.join(OUTPUT_DIR, "invalid_cases.csv")

valid_df = combined_df[combined_df['validity'] == 'valid']
invalid_df = combined_df[combined_df['validity'] == 'invalid']

valid_df.to_csv(valid_csv_path, index=False)
invalid_df.to_csv(invalid_csv_path, index=False)

print(f"Saved valid cases to: {valid_csv_path}")
print(f"Saved invalid cases to: {invalid_csv_path}")