In [2]:
import os
from dotenv import load_dotenv
from pathlib import Path 
import pandas as pd


Merge multiple CSV files with identical headers into one file without duplicating the header row.

Place the CSV files in the same directory (or give a folder path) and run the Python cell below. It reads all matching CSV files, concatenates them, removes any repeated header rows that may appear inside files, and writes a single output CSV.

In [11]:
def merge_csv(folder='',  pattern='*.csv', output=" "):
    p=Path(folder)
    files=[f for f in p.glob(pattern) if f.is_file()]
    print(*files, sep=" |\n")


In [12]:
merge_csv(folder="/Volumes/Crucial X9/MA/data/nonstat", output="test.csv")

/Volumes/Crucial X9/MA/data/nonstat/._lens-collections.csv |
/Volumes/Crucial X9/MA/data/nonstat/._lens-export.csv |
/Volumes/Crucial X9/MA/data/nonstat/lens-collections.csv |
/Volumes/Crucial X9/MA/data/nonstat/._lens-export (1).csv |
/Volumes/Crucial X9/MA/data/nonstat/lens-export.csv |
/Volumes/Crucial X9/MA/data/nonstat/._lens-export (2).csv |
/Volumes/Crucial X9/MA/data/nonstat/lens-export (1).csv |
/Volumes/Crucial X9/MA/data/nonstat/._lens-export (3).csv |
/Volumes/Crucial X9/MA/data/nonstat/lens-export (2).csv |
/Volumes/Crucial X9/MA/data/nonstat/lens-export (3).csv


In [None]:
# Merge CSVs in a folder without duplicating header rows
from pathlib import Path
import pandas as pd

def merge_csvs(folder='.', pattern='*.csv', output='merged.csv'):
    p = Path(folder)
    files = sorted([f for f in p.glob(pattern) if f.is_file()])
    if not files:
        print('No files found for pattern', pattern, 'in', folder)
        return

    dfs = []
    for f in files:
        # Read as strings to avoid dtype mismatches; change dtype or add converters if needed
        df = pd.read_csv(f, dtype=str)
        dfs.append(df)

    merged = pd.concat(dfs, ignore_index=True)

    # Remove rows that equal the header values (handles files that include header rows again)
    header_row = list(merged.columns.astype(str))
    def is_header_row(row):
        return all(str(val) == col for val, col in zip(row, header_row))

    mask = merged.apply(is_header_row, axis=1)
    if mask.any():
        merged = merged.loc[~mask].reset_index(drop=True)

    merged.to_csv(output, index=False)
    print(f'Merged {len(files)} files into {output}. Rows: {len(merged)}')

# Example usage:
# merge_csvs(folder='path/to/csvs', pattern='*.csv', output='all_merged.csv')