# **GraphPad Prism `.pzfx` File to `.csv` Converter**

## **Description**
This notebook is designed to parse and convert GraphPad Prism `.pzfx` files into multiple `.csv` files, each representing a table in the original file. After conversion, all `.csv` files are zipped into a single `.zip` archive, which can be manually downloaded from the Colab file browser.

## **Features**
- Parses `.pzfx` files to extract tables as `pandas` DataFrames.
- Converts each table to a `.csv` file.
- Combines all generated `.csv` files into a `.zip` archive for convenience.
- Allows manual download of the `.zip` file via the Colab file browser.

## **How to Use**
1. **Upload `.pzfx` File**: Run the notebook and upload your `.pzfx` file when prompted.
2. **Processing**: The notebook will parse the file and extract all tables.
3. **Download**: After processing, the resulting `.zip` file will be available in the **Files** pane (on the left side of Colab). Manually download it by:
   - Clicking the **Files** icon on the left sidebar.
   - Navigating to `converted_tables.zip`.
   - Right-clicking the file and selecting **Download**.

## **Output**
- A `.zip` archive named `converted_tables.zip` containing all the `.csv` files.

## **Note**
Ensure your `.pzfx` files are valid and compatible with GraphPad Prism XML version 5.00 to avoid parsing errors.

## **Example Workflow**
1. Run the first cell to upload your `.pzfx` file.
2. Let the notebook process the file and generate the `.csv` files.
3. Locate `converted_tables.zip` in the Colab **Files** pane and download it manually.

## **Troubleshooting**
- If a table fails to parse, ensure the `.pzfx` file format matches the expected XML structure.
- Check for missing or invalid data in your `.pzfx` file if the output seems incomplete.
- If the download doesn't start automatically, use the manual download instructions above.


In [None]:
# @title Run to install


!pip install -q pandas openpyxl

# Step 2: Define the Prism File Parser

import xml.etree.ElementTree as ET
import pandas as pd
from itertools import count, chain, cycle
import numpy as np


class PrismFileLoadError(Exception):
    pass

def _get_all_text(element):
    s = ''
    for c in element.iter():
        if c.text is not None:
            s += c.text
    return s

def _subcolumn_to_numpy(subcolumn, ns):
    try:
        data = []
        for d in subcolumn.findall('d', ns):
            if not (('Excluded' in d.attrib) and (d.attrib['Excluded'] == '1')):
                if _get_all_text(d) == '':
                    data.append(None)
                else:
                    data.append(float(_get_all_text(d)))
            else:
                data.append(np.nan)
    except Exception as a:
        print(f"Couldn't Read a column in the file because: {a}")
        data = None

    return np.array(data)

def _parse_xy_table(table, ns):
    xformat = table.attrib['XFormat']
    try:
        yformat = table.attrib['YFormat']
    except KeyError:
        yformat = None
    evformat = table.attrib['EVFormat']

    xscounter = count()
    xsubcolumn_names = lambda: str(next(xscounter))
    if yformat == 'SEN':
        yslist = cycle(['Mean', 'SEM', 'N'])
        ysubcolumn_names = lambda: next(yslist)
    elif yformat == 'upper-lower-limits':
        yslist = cycle(['Mean', 'Lower', 'Upper'])
        ysubcolumn_names = lambda: next(yslist)
    else:
        yscounter = count()
        ysubcolumn_names = lambda: str(next(yscounter))

    index = None
    for row_titles in table.findall('RowTitlesColumn', ns):
        for subcolumn in row_titles.findall('Subcolumn', ns):
            titles = []
            for d in subcolumn.findall('d', ns):
                titles.append(_get_all_text(d))
            index = pd.Index(titles)

    columns = {}
    for xcolumn in chain(table.findall('XColumn', ns), table.findall('XAdvancedColumn', ns)):
        xcolumn_name = _get_all_text(xcolumn.find('Title', ns))
        for subcolumn in xcolumn.findall('Subcolumn', ns):
            subcolumn_name = xcolumn_name + '_' + xsubcolumn_names()
            columns[subcolumn_name] = _subcolumn_to_numpy(subcolumn, ns)
    for ycolumn in chain(table.findall('YColumn', ns), table.findall('YAdvancedColumn', ns)):
        ycolumn_name = _get_all_text(ycolumn.find('Title', ns))
        for subcolumn in ycolumn.findall('Subcolumn', ns):
            subcolumn_name = ycolumn_name + '_' + ysubcolumn_names()
            columns[subcolumn_name] = _subcolumn_to_numpy(subcolumn, ns)

    maxlength = max([v.shape[0] if v.shape != () else 0 for v in columns.values()])
    for k, v in columns.items():
        if v.shape != ():
            if v.shape[0] < maxlength:
                columns[k] = np.pad(v, (0, maxlength - v.shape[0]), mode='constant', constant_values=np.nan)
        else:
            columns[k] = np.pad(v, (0, maxlength - 0), mode='constant', constant_values=np.nan)

    return pd.DataFrame(columns, index=index)

def _parse_table_to_dataframe(table, ns):
    tabletype = table.attrib['TableType']

    if tabletype in {'XY', 'TwoWay', 'OneWay'}:
        df = _parse_xy_table(table, ns)
    else:
        raise PrismFileLoadError(f'Cannot parse {tabletype} tables for now!')

    return df

def read_pzfx(filename):
    """Open and parse the Prism pzfx file given in `filename`."""
    tree = ET.parse(filename)
    root = tree.getroot()
    if root.tag == 'GraphPadPrismFile':
        ns = None
    elif root.tag == '{http://graphpad.com/prism/Prism.htm}GraphPadPrismFile':
        ns = {'': 'http://graphpad.com/prism/Prism.htm'}
    else:
        raise PrismFileLoadError('Not a Prism file!')
    if root.attrib['PrismXMLVersion'] != '5.00':
        raise PrismFileLoadError('Can only load Prism files with XML version 5.00!')

    tables = {_get_all_text(table.find('Title', ns)): _parse_table_to_dataframe(table, ns)
              for table in root.findall('Table', ns)}

    return tables


In [None]:
# @title Run to process

import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import zipfile
import os
from itertools import count, chain, cycle
from IPython.display import FileLink  # Import FileLink to provide download links
from google.colab import files
from IPython.display import HTML

# Upload your .pzfx file
uploaded = files.upload()

# Create a directory for the output CSVs
output_dir = "converted_csvs"
os.makedirs(output_dir, exist_ok=True)

# Parse and save CSVs
for filename in uploaded.keys():
    # Parse the .pzfx file
    tables = read_pzfx(filename)

    # Save each table as a CSV file
    for table_name, df in tables.items():
        csv_name = os.path.join(output_dir, f"{table_name}.csv".replace('/', '_'))
        df.to_csv(csv_name, index=False)
        print(f"Saved table '{table_name}' to '{csv_name}'")

#  Zip All CSV Files
zip_filename = "converted_tables.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=file)
