In [1]:
import csv

In [2]:
def compare_csv_files(file1, file2, output_file):
    
    differing_rows = []
    difference_count = 0

    try:
        with open(file1, 'r', newline='') as f1, open(file2, 'r', newline='') as f2:
            reader1 = csv.reader(f1)
            reader2 = csv.reader(f2)
            
            # Read and compare each row from both files
            for row_num, (row1, row2) in enumerate(zip(reader1, reader2), start=1):
                if row1 != row2:
                    differing_rows.append([row_num, row1, row2])  # Store row number and both rows
                    difference_count += 1

            # Check if one file has extra rows
            remaining_rows1 = list(reader1)
            remaining_rows2 = list(reader2)
            if remaining_rows1 or remaining_rows2:
                # There are extra rows in one of the files
                max_rows = max(len(remaining_rows1), len(remaining_rows2))
                for i in range(max_rows):
                    row1 = remaining_rows1[i] if i < len(remaining_rows1) else None
                    row2 = remaining_rows2[i] if i < len(remaining_rows2) else None
                    differing_rows.append([row_num + i + 1, row1, row2])
                    difference_count += 1

        # Output the differing rows to a new CSV file
        if differing_rows:
            with open(output_file, 'w', newline='') as f_out:
                writer = csv.writer(f_out)
                writer.writerows(differing_rows)
        
        if difference_count == 0:
            print("The CSV files are exactly the same.")
        else:
            print(f"Found {difference_count} differing rows. Differences saved in {output_file}.")
        
        return difference_count
    
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return -1


In [7]:
file1 = '2023Q4_stat_mine.csv'
file2 = '2023Q4_stat.csv'
output_file = '2023Q4_check.csv'

compare_csv_files(file1, file2, output_file)

Found 1 differing rows. Differences saved in 2023Q4_check.csv.


1