In [6]:
from pathlib import Path
import pandas as pd

def xlsx_folder_to_csv(
    src_dir,
    out_dir=None,
    sheets='all',
    delimiter=',',
    index=False,
    overwrite=False
):
    src = Path(src_dir)
    if out_dir is None:
        out = src / "csvs"
    else:
        out = Path(out_dir)
    out.mkdir(parents=True, exist_ok=True)

    files = sorted(src.glob("*.xlsx")) + sorted(src.glob("*.xls"))
    results = []

    for f in files:
        try:
            if sheets == 'all':
                xls = pd.ExcelFile(f)
                for sheet_name in xls.sheet_names:
                    df = pd.read_excel(f, sheet_name=sheet_name)
                    safe_sheet = "".join(c if c.isalnum() or c in (" ", "_", "-") else "_" for c in sheet_name).strip()
                    out_file = out / f"{f.stem}_{safe_sheet}.csv"
                    if not overwrite and out_file.exists():
                        results.append((str(f), sheet_name, 'skipped_exists', str(out_file)))
                        continue
                    df.to_csv(out_file, index=index, sep=delimiter)
                    results.append((str(f), sheet_name, 'written', str(out_file)))
            else:
                df = pd.read_excel(f, sheet_name=sheets)
                if isinstance(sheets, int):
                    out_file = out / f"{f.stem}_sheet{sheets}.csv"
                    sheet_label = f"index:{sheets}"
                else:
                    safe_sheet = "".join(c if c.isalnum() or c in (" ", "_", "-") else "_" for c in str(sheets)).strip()
                    out_file = out / f"{f.stem}_{safe_sheet}.csv"
                    sheet_label = sheets
                if not overwrite and out_file.exists():
                    results.append((str(f), sheet_label, 'skipped_exists', str(out_file)))
                    continue
                df.to_csv(out_file, index=index, sep=delimiter)
                results.append((str(f), sheet_label, 'written', str(out_file)))
        except Exception as e:
            results.append((str(f), None, 'error', str(e)))
    return results

In [2]:
# results = xlsx_folder_to_csv("files", out_dir="files/csvs", sheets='all', delimiter=',', index=False)
# df_results = pd.DataFrame(results, columns=["source_file", "sheet", "status", "out_path_or_error"])
# df_results


In [None]:
results_first_sheet = xlsx_folder_to_csv("files", out_dir="files/csvs", sheets=0, delimiter=',', overwrite=False)
pd.DataFrame(results_first_sheet, columns=["source_file", "sheet", "status", "out_path_or_error"])


Unnamed: 0,source_file,sheet,status,out_path_or_error
0,files\Anzac Biscuit.xlsx,index:0,written,files\csvs\Anzac Biscuit_sheet0.csv
1,files\Asparagus bundles.xlsx,index:0,written,files\csvs\Asparagus bundles_sheet0.csv
2,files\Baguettes Summer %.xlsx,index:0,written,files\csvs\Baguettes Summer %_sheet0.csv
3,files\Baked Mac _ Cheese.xlsx,index:0,written,files\csvs\Baked Mac _ Cheese_sheet0.csv
4,files\Baked Walleye.xlsx,index:0,written,files\csvs\Baked Walleye_sheet0.csv
...,...,...,...,...
98,files\Creamy Tomato Soup.xls,index:0,written,files\csvs\Creamy Tomato Soup_sheet0.csv
99,files\Fry Bread.xls,index:0,written,files\csvs\Fry Bread_sheet0.csv
100,files\Grandma's Fancy Meatloaf.xls,index:0,written,files\csvs\Grandma's Fancy Meatloaf_sheet0.csv
101,files\May 17th Coffee Cake.xls,index:0,written,files\csvs\May 17th Coffee Cake_sheet0.csv


: 

Collecting xlrd
  Downloading xlrd-2.0.2-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading xlrd-2.0.2-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.2


In [1]:
!pip install pygame

