In [6]:
import csv
import os
from collections import defaultdict

def separate_csv_by_organization(input_file, output_directory, organization_column):
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Dictionary to store data for each organization
    org_data = defaultdict(list)

    # Read the input CSV file
    with open(input_file, 'r', newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        
        # Check if the organization column exists
        if organization_column not in reader.fieldnames:
            raise ValueError(f"Column '{organization_column}' not found in the CSV file.")

        # Group rows by organization
        for row in reader:
            org = row[organization_column]
            org_data[org].append(row)

    # Write separate CSV files for each organization
    for org, data in org_data.items():
        output_file = os.path.join(output_directory, f"{org.replace('/', '_')}_data.csv")
        with open(output_file, 'w', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=reader.fieldnames)
            writer.writeheader()
            writer.writerows(data)

    print(f"Separated data into {len(org_data)} files in the '{output_directory}' directory.")

In [7]:
# Example usage
input_file = 'sldb.csv'  # Replace with your input file name
output_directory = 'Scheme Loan Database'  # Replace with your desired output directory
organization_column = 'employer'  # Replace with the name of your organization column


In [8]:
separate_csv_by_organization(input_file, output_directory, organization_column)

Separated data into 284 files in the 'Scheme Loan Database' directory.
