<a href="https://colab.research.google.com/github/CSUC/RDR-scripts/blob/main/change_CSV_delimiter/csv_delimiter_converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Change semicolon delimiter to comma
### OBSERVATION:
If you have doubts about the code, contact rdr-contacte@csuc.cat
### PURPOSE OF THE SCRIPT
The main purpose of this script is to transform the semicolon delimiter of CSV files to comma.

In [None]:
# @title First click the ▶ button to execute the script. </p> Then click the <strong>'Upload'</strong> button to upload the CSV files. </p>
import os
import csv
import shutil
import chardet
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from io import StringIO

# Detect encoding using chardet
def detect_encoding(byte_content):
    result = chardet.detect(byte_content)
    return result['encoding'] or 'utf-8'

# Process CSV content: convert ; → , and quote fields with commas
def process_csv(byte_content, original_filename):
    encoding = detect_encoding(byte_content)
    decoded_text = byte_content.decode(encoding, errors='replace')

    lines = decoded_text.splitlines()
    output_lines = []

    for line in lines:
        fields = line.strip().split(';')  # only split on ;
        new_fields = []

        for field in fields:
            field = field.strip()
            if ',' in field or '"' in field:
                field = field.replace('"', '""')  # escape internal quotes
                field = f'"{field}"'  # wrap in quotes
            new_fields.append(field)

        output_lines.append(','.join(new_fields))

    return '\n'.join(output_lines)

# Handle file uploads
def on_file_upload(change):
    clear_output(wait=True)

    if not change.new:
        print("⚠️ Please upload at least one file.")
        return

    uploaded_files = change.new.values()

    # Output directory
    directory_name = f'processed_csv_files'
    os.makedirs(directory_name, exist_ok=True)

    processed_count = 0

    for uploaded_file in uploaded_files:
        byte_content = uploaded_file['content']
        original_filename = uploaded_file['metadata']['name']
        new_filename = os.path.splitext(original_filename)[0] + '_converted.csv'

        processed_content = process_csv(byte_content, original_filename)

        if processed_content:
            file_path = os.path.join(directory_name, new_filename)
            with open(file_path, 'w', encoding='utf-8', newline='') as f:
                f.write(processed_content)
            processed_count += 1

    if processed_count == 0:
        print("⚠️ No files were successfully processed.")
        return

    # Zip processed files
    zip_file = shutil.make_archive(directory_name, 'zip', directory_name)

    print(f"✅ Successfully processed {processed_count} file(s). Preparing download...")
    files.download(zip_file)

# Display instructions
display(HTML("""
<p style='font-size:14px;'><b>
Please upload your CSV files using semicolon (;) delimiters.<br>
They will be converted to UTF-8, comma-delimited CSV files.<br>
Fields with commas will be quoted. Resulting files will be zipped for download.
</b></p>
"""))

# Upload widget
upload_button = widgets.FileUpload(accept='.csv', multiple=True)
upload_button.observe(on_file_upload, names='value')
display(upload_button)

