In [2]:
import pandas as pd
import os
import glob

def combine_csv_files(folder_path, output_file):
    """
    Combine all CSV files in a folder into one CSV file
    
    Args:
        folder_path (str): Path to folder containing CSV files
        output_file (str): Path for the combined output CSV file
    """
    # Get all CSV files in the folder
    csv_files = glob.glob(os.path.join(folder_path, "*.csv"))
    
    if not csv_files:
        print(f"No CSV files found in {folder_path}")
        return
    
    print(f"Found {len(csv_files)} CSV files to combine:")
    for file in csv_files:
        print(f"  - {os.path.basename(file)}")
    
    # Read and combine all CSV files
    combined_data = []
    total_rows = 0
    
    for file in csv_files:
        try:
            df = pd.read_csv(file)
            combined_data.append(df)
            total_rows += len(df)
            print(f"  - Loaded {len(df)} rows from {os.path.basename(file)}")
        except Exception as e:
            print(f"  - Error reading {file}: {e}")
    
    if combined_data:
        # Combine all DataFrames
        combined_df = pd.concat(combined_data, ignore_index=True)
        
        # Save to output file
        combined_df.to_csv(output_file, index=False)
        print(f"\nCombined {len(csv_files)} files into {output_file}")
        print(f"Total rows: {total_rows}")
        print(f"Combined DataFrame shape: {combined_df.shape}")
    else:
        print("No data to combine")

# Example usage for your boxscore files
if __name__ == "__main__":
    # Combine all boxscore CSV files
    folder_path = "data/csv/boxscores"
    output_file = "data/csv/boxscores/combined_boxscores.csv"
    
    combine_csv_files(folder_path, output_file)
    
    # Or combine game files
    # folder_path = "data/csv/games"
    # output_file = "data/csv/games/combined_games.csv"
    # combine_csv_files(folder_path, output_file)

Found 113 CSV files to combine:
  - mlb_boxscores_2025-03-27.csv
  - mlb_boxscores_2025-03-28.csv
  - mlb_boxscores_2025-03-29.csv
  - mlb_boxscores_2025-03-30.csv
  - mlb_boxscores_2025-03-31.csv
  - mlb_boxscores_2025-04-01.csv
  - mlb_boxscores_2025-04-02.csv
  - mlb_boxscores_2025-04-03.csv
  - mlb_boxscores_2025-04-04.csv
  - mlb_boxscores_2025-04-05.csv
  - mlb_boxscores_2025-04-06.csv
  - mlb_boxscores_2025-04-07.csv
  - mlb_boxscores_2025-04-08.csv
  - mlb_boxscores_2025-04-09.csv
  - mlb_boxscores_2025-04-10.csv
  - mlb_boxscores_2025-04-11.csv
  - mlb_boxscores_2025-04-12.csv
  - mlb_boxscores_2025-04-13.csv
  - mlb_boxscores_2025-04-14.csv
  - mlb_boxscores_2025-04-15.csv
  - mlb_boxscores_2025-04-16.csv
  - mlb_boxscores_2025-04-17.csv
  - mlb_boxscores_2025-04-18.csv
  - mlb_boxscores_2025-04-19.csv
  - mlb_boxscores_2025-04-20.csv
  - mlb_boxscores_2025-04-21.csv
  - mlb_boxscores_2025-04-22.csv
  - mlb_boxscores_2025-04-23.csv
  - mlb_boxscores_2025-04-24.csv
  - mlb_box