In [2]:
import csv
from collections import OrderedDict

def remove_duplicates(input_file, output_file):
    # Use OrderedDict to maintain the order of entries while removing duplicates
    unique_entries = OrderedDict()
    total_rows = 0

    # Read the input CSV file
    with open(input_file, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        
        # Iterate through each row in the CSV
        for row in reader:
            total_rows += 1
            # Use the title as the key to check for duplicates
            title = row['title']
            
            # If the title is not already in our unique_entries, add it
            if title not in unique_entries:
                unique_entries[title] = row

    # Write the unique entries to the output CSV file
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        # Assume the field names are the same as in the input file
        fieldnames = unique_entries[next(iter(unique_entries))].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header
        writer.writeheader()
        
        # Write the unique rows
        for row in unique_entries.values():
            writer.writerow(row)

    # Calculate the number of removed rows
    removed_rows = total_rows - len(unique_entries)

    print(f"Total rows in original file: {total_rows}")
    print(f"Unique rows: {len(unique_entries)}")
    print(f"Rows removed: {removed_rows}")
    print(f"Unique entries saved to {output_file}")

# Usage
input_file = 'combined_all_data.csv'
output_file = 'combined_all_data_no_duplicates.csv'

remove_duplicates(input_file, output_file)

Total rows in original file: 295
Unique rows: 252
Rows removed: 43
Unique entries saved to combined_all_data_no_duplicates.csv
