In [1]:
import csv
from collections import defaultdict

def combine_csv_files_merge_rows(output_file, *input_files):
    """
    Combine multiple CSV files into one.
    Rows with the same first-column value are merged into one row.
    For subsequent columns, duplicate values are removed.
    """

    merged_data = defaultdict(lambda: defaultdict(set))
    header = None

    # Read input files
    for file in input_files:
        with open(file, newline='', encoding="latin-1") as infile:
            reader = csv.reader(infile)
            file_header = next(reader, None)

            # Use header from the first file
            if header is None and file_header:
                header = file_header

            for row in reader:
                if not row:
                    continue
                key = row[0].strip()

                for col_index, value in enumerate(row):
                    if value.strip():
                        merged_data[key][col_index].add(value.strip())

    # Write combined file
    with open(output_file, "w", newline='', encoding="latin-1") as outfile:
        writer = csv.writer(outfile)

        if header:
            writer.writerow(header)

        for key, cols in merged_data.items():
            max_col = max(cols.keys())
            row_out = []
            for col_index in range(max_col + 1):
                if col_index in cols:
                    # join unique values with semicolon
                    row_out.append(";".join(sorted(cols[col_index])))
                else:
                    row_out.append("")
            writer.writerow(row_out)

    print(f"✅ Merged CSV saved as {output_file}")




In [2]:
combine_csv_files_merge_rows(
    "All_words_from_NGLS.csv",
    r"4_NGSL_lists_combined.csv",
    r"unique_file1.csv",
    r"unique_file2.csv",
)

✅ Merged CSV saved as All_words_from_NGLS.csv
