# Preprocessing Notebook

This notebook converts `.sav` (SPSS) and `.xlsx` (Excel) files from the `working datasets` directory into standard `.csv` format for analysis.

In [None]:
import pandas as pd
import os
import glob

WORKING_DIR = r"..\working datasets"
# Note: ensuring path is correct relative to Notebooks directory
if not os.path.exists(WORKING_DIR):
    WORKING_DIR = r"working datasets" # Fallback if running from root

def convert_sav_to_csv():
    files = glob.glob(os.path.join(WORKING_DIR, "*.sav"))
    for f in files:
        try:
            print(f"Converting {f}...")
            df = pd.read_spss(f)
            new_name = f.replace(".sav", ".csv")
            df.to_csv(new_name, index=False)
            print(f"Saved {new_name}")
        except Exception as e:
            print(f"Error converting {f}: {e}")

def convert_xlsx_to_csv():
    files = glob.glob(os.path.join(WORKING_DIR, "*.xlsx"))
    for f in files:
        try:
            print(f"Converting {f}...")
            df = pd.read_excel(f)
            new_name = f.replace(".xlsx", ".csv")
            df.to_csv(new_name, index=False)
            print(f"Saved {new_name}")
        except Exception as e:
            print(f"Error converting {f}: {e}")

if __name__ == "__main__":
    convert_sav_to_csv()
    convert_xlsx_to_csv()
    print("Done.")